nnetsauce
1from .base.base import Base 2from .base.baseRegressor import BaseRegressor 3from .boosting.adaBoostClassifier import AdaBoostClassifier 4from .custom.customClassifier import CustomClassifier 5from .custom.customRegressor import CustomRegressor 6from .datasets import Downloader 7from .deep.deepClassifier import DeepClassifier 8from .deep.deepRegressor import DeepRegressor 9from .deep.deepMTS import DeepMTS 10from .glm.glmClassifier import GLMClassifier 11from .glm.glmRegressor import GLMRegressor 12from .kernel.kernel import KernelRidge 13from .lazypredict.lazydeepClassifier import LazyDeepClassifier, LazyClassifier 14from .lazypredict.lazydeepRegressor import LazyDeepRegressor, LazyRegressor 15from .lazypredict.lazydeepClassifier import LazyDeepClassifier 16from .lazypredict.lazydeepRegressor import LazyDeepRegressor 17from .lazypredict.lazydeepMTS import LazyDeepMTS, LazyMTS 18from .mts.mts import MTS 19from .mts.classical import ClassicalMTS 20from .multitask.multitaskClassifier import MultitaskClassifier 21from .multitask.simplemultitaskClassifier import SimpleMultitaskClassifier 22from .neuralnet.neuralnetregression import NeuralNetRegressor 23from .neuralnet.neuralnetclassification import NeuralNetClassifier 24from .optimizers.optimizer import Optimizer 25from .predictioninterval import PredictionInterval 26from .quantile.quantileregression import QuantileRegressor 27from .quantile.quantileclassification import QuantileClassifier 28from .randombag.randomBagClassifier import RandomBagClassifier 29from .randombag.randomBagRegressor import RandomBagRegressor 30from .ridge.ridge import RidgeRegressor 31from .ridge2.ridge2Classifier import Ridge2Classifier 32from .ridge2.ridge2Regressor import Ridge2Regressor 33from .ridge2.ridge2MultitaskClassifier import Ridge2MultitaskClassifier 34from .rvfl.bayesianrvflRegressor import BayesianRVFLRegressor 35from .rvfl.bayesianrvfl2Regressor import BayesianRVFL2Regressor 36from .sampling import SubSampler 37from .updater import RegressorUpdater, ClassifierUpdater 38from .votingregressor import MedianVotingRegressor 39 40__all__ = [ 41 "AdaBoostClassifier", 42 "Base", 43 "BaseRegressor", 44 "BayesianRVFLRegressor", 45 "BayesianRVFL2Regressor", 46 "ClassicalMTS", 47 "CustomClassifier", 48 "CustomRegressor", 49 "DeepClassifier", 50 "DeepRegressor", 51 "DeepMTS", 52 "Downloader", 53 "GLMClassifier", 54 "GLMRegressor", 55 "KernelRidge", 56 "LazyClassifier", 57 "LazyRegressor", 58 "LazyDeepClassifier", 59 "LazyDeepRegressor", 60 "LazyMTS", 61 "LazyDeepMTS", 62 "MedianVotingRegressor", 63 "MTS", 64 "MultitaskClassifier", 65 "NeuralNetRegressor", 66 "NeuralNetClassifier", 67 "PredictionInterval", 68 "SimpleMultitaskClassifier", 69 "Optimizer", 70 "QuantileRegressor", 71 "QuantileClassifier", 72 "RandomBagRegressor", 73 "RandomBagClassifier", 74 "RegressorUpdater", 75 "ClassifierUpdater", 76 "RidgeRegressor", 77 "Ridge2Regressor", 78 "Ridge2Classifier", 79 "Ridge2MultitaskClassifier", 80 "SubSampler", 81]
21class AdaBoostClassifier(Boosting, ClassifierMixin): 22 """AdaBoost Classification (SAMME) model class derived from class Boosting 23 24 Parameters: 25 26 obj: object 27 any object containing a method fit (obj.fit()) and a method predict 28 (obj.predict()) 29 30 n_estimators: int 31 number of boosting iterations 32 33 learning_rate: float 34 learning rate of the boosting procedure 35 36 n_hidden_features: int 37 number of nodes in the hidden layer 38 39 reg_lambda: float 40 regularization parameter for weights 41 42 reg_alpha: float 43 controls compromize between l1 and l2 norm of weights 44 45 activation_name: str 46 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 47 48 a: float 49 hyperparameter for 'prelu' or 'elu' activation function 50 51 nodes_sim: str 52 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 53 'uniform' 54 55 bias: boolean 56 indicates if the hidden layer contains a bias term (True) or not 57 (False) 58 59 dropout: float 60 regularization parameter; (random) percentage of nodes dropped out 61 of the training 62 63 direct_link: boolean 64 indicates if the original predictors are included (True) in model's 65 fitting or not (False) 66 67 n_clusters: int 68 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 69 no clustering) 70 71 cluster_encode: bool 72 defines how the variable containing clusters is treated (default is one-hot) 73 if `False`, then labels are used, without one-hot encoding 74 75 type_clust: str 76 type of clustering method: currently k-means ('kmeans') or Gaussian 77 Mixture Model ('gmm') 78 79 type_scaling: a tuple of 3 strings 80 scaling methods for inputs, hidden layer, and clustering respectively 81 (and when relevant). 82 Currently available: standardization ('std') or MinMax scaling ('minmax') 83 84 col_sample: float 85 percentage of covariates randomly chosen for training 86 87 row_sample: float 88 percentage of rows chosen for training, by stratified bootstrapping 89 90 seed: int 91 reproducibility seed for nodes_sim=='uniform' 92 93 verbose: int 94 0 for no output, 1 for a progress bar (default is 1) 95 96 method: str 97 type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real) 98 99 backend: str 100 "cpu" or "gpu" or "tpu" 101 102 Attributes: 103 104 alpha_: list 105 AdaBoost coefficients alpha_m 106 107 base_learners_: dict 108 a dictionary containing the base learners 109 110 Examples: 111 112 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py) 113 114 ```python 115 import nnetsauce as ns 116 import numpy as np 117 from sklearn.datasets import load_breast_cancer 118 from sklearn.linear_model import LogisticRegression 119 from sklearn.model_selection import train_test_split 120 from sklearn import metrics 121 from time import time 122 123 breast_cancer = load_breast_cancer() 124 Z = breast_cancer.data 125 t = breast_cancer.target 126 np.random.seed(123) 127 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 128 129 # SAMME.R 130 clf = LogisticRegression(solver='liblinear', multi_class = 'ovr', 131 random_state=123) 132 fit_obj = ns.AdaBoostClassifier(clf, 133 n_hidden_features=int(11.22338867), 134 direct_link=True, 135 n_estimators=250, learning_rate=0.01126343, 136 col_sample=0.72684326, row_sample=0.86429443, 137 dropout=0.63078613, n_clusters=2, 138 type_clust="gmm", 139 verbose=1, seed = 123, 140 method="SAMME.R") 141 142 start = time() 143 fit_obj.fit(X_train, y_train) 144 print(f"Elapsed {time() - start}") 145 146 start = time() 147 print(fit_obj.score(X_test, y_test)) 148 print(f"Elapsed {time() - start}") 149 150 preds = fit_obj.predict(X_test) 151 152 print(metrics.classification_report(preds, y_test)) 153 154 ``` 155 156 """ 157 158 # construct the object ----- 159 160 def __init__( 161 self, 162 obj, 163 n_estimators=10, 164 learning_rate=0.1, 165 n_hidden_features=1, 166 reg_lambda=0, 167 reg_alpha=0.5, 168 activation_name="relu", 169 a=0.01, 170 nodes_sim="sobol", 171 bias=True, 172 dropout=0, 173 direct_link=False, 174 n_clusters=2, 175 cluster_encode=True, 176 type_clust="kmeans", 177 type_scaling=("std", "std", "std"), 178 col_sample=1, 179 row_sample=1, 180 seed=123, 181 verbose=1, 182 method="SAMME", 183 backend="cpu", 184 ): 185 self.type_fit = "classification" 186 self.verbose = verbose 187 self.method = method 188 self.reg_lambda = reg_lambda 189 self.reg_alpha = reg_alpha 190 191 super().__init__( 192 obj=obj, 193 n_estimators=n_estimators, 194 learning_rate=learning_rate, 195 n_hidden_features=n_hidden_features, 196 activation_name=activation_name, 197 a=a, 198 nodes_sim=nodes_sim, 199 bias=bias, 200 dropout=dropout, 201 direct_link=direct_link, 202 n_clusters=n_clusters, 203 cluster_encode=cluster_encode, 204 type_clust=type_clust, 205 type_scaling=type_scaling, 206 col_sample=col_sample, 207 row_sample=row_sample, 208 seed=seed, 209 backend=backend, 210 ) 211 212 self.alpha_ = [] 213 self.base_learners_ = dict.fromkeys(range(n_estimators)) 214 215 def fit(self, X, y, sample_weight=None, **kwargs): 216 """Fit Boosting model to training data (X, y). 217 218 Parameters: 219 220 X: {array-like}, shape = [n_samples, n_features] 221 Training vectors, where n_samples is the number 222 of samples and n_features is the number of features. 223 224 y: array-like, shape = [n_samples] 225 Target values. 226 227 **kwargs: additional parameters to be passed to 228 self.cook_training_set or self.obj.fit 229 230 Returns: 231 232 self: object 233 """ 234 235 assert mx.is_factor(y), "y must contain only integers" 236 237 assert self.method in ( 238 "SAMME", 239 "SAMME.R", 240 ), "`method` must be either 'SAMME' or 'SAMME.R'" 241 242 assert (self.reg_lambda <= 1) & ( 243 self.reg_lambda >= 0 244 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 245 246 assert (self.reg_alpha <= 1) & ( 247 self.reg_alpha >= 0 248 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 249 250 # training 251 n, p = X.shape 252 self.n_classes = len(np.unique(y)) 253 self.classes_ = np.unique(y) # for compatibility with sklearn 254 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 255 256 if sample_weight is None: 257 w_m = np.repeat(1.0 / n, n) 258 else: 259 w_m = np.asarray(sample_weight) 260 261 base_learner = CustomClassifier( 262 self.obj, 263 n_hidden_features=self.n_hidden_features, 264 activation_name=self.activation_name, 265 a=self.a, 266 nodes_sim=self.nodes_sim, 267 bias=self.bias, 268 dropout=self.dropout, 269 direct_link=self.direct_link, 270 n_clusters=self.n_clusters, 271 type_clust=self.type_clust, 272 type_scaling=self.type_scaling, 273 col_sample=self.col_sample, 274 row_sample=self.row_sample, 275 seed=self.seed, 276 ) 277 278 if self.verbose == 1: 279 pbar = Progbar(self.n_estimators) 280 281 if self.method == "SAMME": 282 err_m = 1e6 283 err_bound = 1 - 1 / self.n_classes 284 self.alpha_.append(1.0) 285 x_range_n = range(n) 286 287 for m in range(self.n_estimators): 288 preds = base_learner.fit( 289 X, y, sample_weight=w_m.ravel(), **kwargs 290 ).predict(X) 291 292 self.base_learners_.update({m: deepcopy(base_learner)}) 293 294 cond = [y[i] != preds[i] for i in x_range_n] 295 296 err_m = max( 297 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 298 2.220446049250313e-16, 299 ) # sum(w_m) == 1 300 301 if self.reg_lambda > 0: 302 err_m += self.reg_lambda * ( 303 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 304 + self.reg_alpha * sum([abs(x) for x in w_m]) 305 ) 306 307 err_m = min(err_m, err_bound) 308 309 alpha_m = self.learning_rate * log( 310 (self.n_classes - 1) * (1 - err_m) / err_m 311 ) 312 313 self.alpha_.append(alpha_m) 314 315 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 316 317 sum_w_m = sum(w_m_temp) 318 319 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 320 321 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 322 323 if self.verbose == 1: 324 pbar.update(m) 325 326 if self.verbose == 1: 327 pbar.update(self.n_estimators) 328 329 self.n_estimators = len(self.base_learners_) 330 self.classes_ = np.unique(y) 331 332 return self 333 334 if self.method == "SAMME.R": 335 Y = mo.one_hot_encode2(y, self.n_classes) 336 337 if sample_weight is None: 338 w_m = np.repeat(1.0 / n, n) # (N, 1) 339 340 else: 341 w_m = np.asarray(sample_weight) 342 343 for m in range(self.n_estimators): 344 probs = base_learner.fit( 345 X, y, sample_weight=w_m.ravel(), **kwargs 346 ).predict_proba(X) 347 348 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 349 350 self.base_learners_.update({m: deepcopy(base_learner)}) 351 352 w_m *= np.exp( 353 -1.0 354 * self.learning_rate 355 * (1.0 - 1.0 / self.n_classes) 356 * xlogy(Y, probs).sum(axis=1) 357 ) 358 359 w_m /= np.sum(w_m) 360 361 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 362 363 if self.verbose == 1: 364 pbar.update(m) 365 366 if self.verbose == 1: 367 pbar.update(self.n_estimators) 368 369 self.n_estimators = len(self.base_learners_) 370 self.classes_ = np.unique(y) 371 372 return self 373 374 def predict(self, X, **kwargs): 375 """Predict test data X. 376 377 Parameters: 378 379 X: {array-like}, shape = [n_samples, n_features] 380 Training vectors, where n_samples is the number 381 of samples and n_features is the number of features. 382 383 **kwargs: additional parameters to be passed to 384 self.cook_test_set 385 386 Returns: 387 388 model predictions: {array-like} 389 """ 390 return self.predict_proba(X, **kwargs).argmax(axis=1) 391 392 def predict_proba(self, X, **kwargs): 393 """Predict probabilities for test data X. 394 395 Parameters: 396 397 X: {array-like}, shape = [n_samples, n_features] 398 Training vectors, where n_samples is the number 399 of samples and n_features is the number of features. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 406 probability estimates for test data: {array-like} 407 408 """ 409 410 n_iter = len(self.base_learners_) 411 412 if self.method == "SAMME": 413 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 414 415 # if self.verbose == 1: 416 # pbar = Progbar(n_iter) 417 418 for idx, base_learner in self.base_learners_.items(): 419 preds = base_learner.predict(X, **kwargs) 420 421 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 422 preds, self.n_classes 423 ) 424 425 # if self.verbose == 1: 426 # pbar.update(idx) 427 428 # if self.verbose == 1: 429 # pbar.update(n_iter) 430 431 expit_ensemble_learner = expit(ensemble_learner) 432 433 sum_ensemble = expit_ensemble_learner.sum(axis=1) 434 435 return expit_ensemble_learner / sum_ensemble[:, None] 436 437 # if self.method == "SAMME.R": 438 ensemble_learner = 0 439 440 # if self.verbose == 1: 441 # pbar = Progbar(n_iter) 442 443 for idx, base_learner in self.base_learners_.items(): 444 probs = base_learner.predict_proba(X, **kwargs) 445 446 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 447 448 log_preds_proba = np.log(probs) 449 450 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 451 452 # if self.verbose == 1: 453 # pbar.update(idx) 454 455 ensemble_learner *= self.n_classes - 1 456 457 # if self.verbose == 1: 458 # pbar.update(n_iter) 459 460 expit_ensemble_learner = expit(ensemble_learner) 461 462 sum_ensemble = expit_ensemble_learner.sum(axis=1) 463 464 return expit_ensemble_learner / sum_ensemble[:, None]
AdaBoost Classification (SAMME) model class derived from class Boosting
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
learning_rate: float
learning rate of the boosting procedure
n_hidden_features: int
number of nodes in the hidden layer
reg_lambda: float
regularization parameter for weights
reg_alpha: float
controls compromize between l1 and l2 norm of weights
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
verbose: int
0 for no output, 1 for a progress bar (default is 1)
method: str
type of Adaboost method, 'SAMME' (discrete) or 'SAMME.R' (real)
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
alpha_: list
AdaBoost coefficients alpha_m
base_learners_: dict
a dictionary containing the base learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/adaboost_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# SAMME.R
clf = LogisticRegression(solver='liblinear', multi_class = 'ovr',
random_state=123)
fit_obj = ns.AdaBoostClassifier(clf,
n_hidden_features=int(11.22338867),
direct_link=True,
n_estimators=250, learning_rate=0.01126343,
col_sample=0.72684326, row_sample=0.86429443,
dropout=0.63078613, n_clusters=2,
type_clust="gmm",
verbose=1, seed = 123,
method="SAMME.R")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
preds = fit_obj.predict(X_test)
print(metrics.classification_report(preds, y_test))
215 def fit(self, X, y, sample_weight=None, **kwargs): 216 """Fit Boosting model to training data (X, y). 217 218 Parameters: 219 220 X: {array-like}, shape = [n_samples, n_features] 221 Training vectors, where n_samples is the number 222 of samples and n_features is the number of features. 223 224 y: array-like, shape = [n_samples] 225 Target values. 226 227 **kwargs: additional parameters to be passed to 228 self.cook_training_set or self.obj.fit 229 230 Returns: 231 232 self: object 233 """ 234 235 assert mx.is_factor(y), "y must contain only integers" 236 237 assert self.method in ( 238 "SAMME", 239 "SAMME.R", 240 ), "`method` must be either 'SAMME' or 'SAMME.R'" 241 242 assert (self.reg_lambda <= 1) & ( 243 self.reg_lambda >= 0 244 ), "must have self.reg_lambda <= 1 & self.reg_lambda >= 0" 245 246 assert (self.reg_alpha <= 1) & ( 247 self.reg_alpha >= 0 248 ), "must have self.reg_alpha <= 1 & self.reg_alpha >= 0" 249 250 # training 251 n, p = X.shape 252 self.n_classes = len(np.unique(y)) 253 self.classes_ = np.unique(y) # for compatibility with sklearn 254 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 255 256 if sample_weight is None: 257 w_m = np.repeat(1.0 / n, n) 258 else: 259 w_m = np.asarray(sample_weight) 260 261 base_learner = CustomClassifier( 262 self.obj, 263 n_hidden_features=self.n_hidden_features, 264 activation_name=self.activation_name, 265 a=self.a, 266 nodes_sim=self.nodes_sim, 267 bias=self.bias, 268 dropout=self.dropout, 269 direct_link=self.direct_link, 270 n_clusters=self.n_clusters, 271 type_clust=self.type_clust, 272 type_scaling=self.type_scaling, 273 col_sample=self.col_sample, 274 row_sample=self.row_sample, 275 seed=self.seed, 276 ) 277 278 if self.verbose == 1: 279 pbar = Progbar(self.n_estimators) 280 281 if self.method == "SAMME": 282 err_m = 1e6 283 err_bound = 1 - 1 / self.n_classes 284 self.alpha_.append(1.0) 285 x_range_n = range(n) 286 287 for m in range(self.n_estimators): 288 preds = base_learner.fit( 289 X, y, sample_weight=w_m.ravel(), **kwargs 290 ).predict(X) 291 292 self.base_learners_.update({m: deepcopy(base_learner)}) 293 294 cond = [y[i] != preds[i] for i in x_range_n] 295 296 err_m = max( 297 sum([elt[0] * elt[1] for elt in zip(cond, w_m)]), 298 2.220446049250313e-16, 299 ) # sum(w_m) == 1 300 301 if self.reg_lambda > 0: 302 err_m += self.reg_lambda * ( 303 (1 - self.reg_alpha) * 0.5 * sum([x**2 for x in w_m]) 304 + self.reg_alpha * sum([abs(x) for x in w_m]) 305 ) 306 307 err_m = min(err_m, err_bound) 308 309 alpha_m = self.learning_rate * log( 310 (self.n_classes - 1) * (1 - err_m) / err_m 311 ) 312 313 self.alpha_.append(alpha_m) 314 315 w_m_temp = [exp(alpha_m * cond[i]) for i in x_range_n] 316 317 sum_w_m = sum(w_m_temp) 318 319 w_m = np.asarray([w_m_temp[i] / sum_w_m for i in x_range_n]) 320 321 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 322 323 if self.verbose == 1: 324 pbar.update(m) 325 326 if self.verbose == 1: 327 pbar.update(self.n_estimators) 328 329 self.n_estimators = len(self.base_learners_) 330 self.classes_ = np.unique(y) 331 332 return self 333 334 if self.method == "SAMME.R": 335 Y = mo.one_hot_encode2(y, self.n_classes) 336 337 if sample_weight is None: 338 w_m = np.repeat(1.0 / n, n) # (N, 1) 339 340 else: 341 w_m = np.asarray(sample_weight) 342 343 for m in range(self.n_estimators): 344 probs = base_learner.fit( 345 X, y, sample_weight=w_m.ravel(), **kwargs 346 ).predict_proba(X) 347 348 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 349 350 self.base_learners_.update({m: deepcopy(base_learner)}) 351 352 w_m *= np.exp( 353 -1.0 354 * self.learning_rate 355 * (1.0 - 1.0 / self.n_classes) 356 * xlogy(Y, probs).sum(axis=1) 357 ) 358 359 w_m /= np.sum(w_m) 360 361 base_learner.set_params(seed=self.seed + (m + 1) * 1000) 362 363 if self.verbose == 1: 364 pbar.update(m) 365 366 if self.verbose == 1: 367 pbar.update(self.n_estimators) 368 369 self.n_estimators = len(self.base_learners_) 370 self.classes_ = np.unique(y) 371 372 return self
Fit Boosting model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
374 def predict(self, X, **kwargs): 375 """Predict test data X. 376 377 Parameters: 378 379 X: {array-like}, shape = [n_samples, n_features] 380 Training vectors, where n_samples is the number 381 of samples and n_features is the number of features. 382 383 **kwargs: additional parameters to be passed to 384 self.cook_test_set 385 386 Returns: 387 388 model predictions: {array-like} 389 """ 390 return self.predict_proba(X, **kwargs).argmax(axis=1)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
392 def predict_proba(self, X, **kwargs): 393 """Predict probabilities for test data X. 394 395 Parameters: 396 397 X: {array-like}, shape = [n_samples, n_features] 398 Training vectors, where n_samples is the number 399 of samples and n_features is the number of features. 400 401 **kwargs: additional parameters to be passed to 402 self.cook_test_set 403 404 Returns: 405 406 probability estimates for test data: {array-like} 407 408 """ 409 410 n_iter = len(self.base_learners_) 411 412 if self.method == "SAMME": 413 ensemble_learner = np.zeros((X.shape[0], self.n_classes)) 414 415 # if self.verbose == 1: 416 # pbar = Progbar(n_iter) 417 418 for idx, base_learner in self.base_learners_.items(): 419 preds = base_learner.predict(X, **kwargs) 420 421 ensemble_learner += self.alpha_[idx] * mo.one_hot_encode2( 422 preds, self.n_classes 423 ) 424 425 # if self.verbose == 1: 426 # pbar.update(idx) 427 428 # if self.verbose == 1: 429 # pbar.update(n_iter) 430 431 expit_ensemble_learner = expit(ensemble_learner) 432 433 sum_ensemble = expit_ensemble_learner.sum(axis=1) 434 435 return expit_ensemble_learner / sum_ensemble[:, None] 436 437 # if self.method == "SAMME.R": 438 ensemble_learner = 0 439 440 # if self.verbose == 1: 441 # pbar = Progbar(n_iter) 442 443 for idx, base_learner in self.base_learners_.items(): 444 probs = base_learner.predict_proba(X, **kwargs) 445 446 np.clip(a=probs, a_min=2.220446049250313e-16, a_max=1.0, out=probs) 447 448 log_preds_proba = np.log(probs) 449 450 ensemble_learner += log_preds_proba - log_preds_proba.mean(axis=1)[:, None] 451 452 # if self.verbose == 1: 453 # pbar.update(idx) 454 455 ensemble_learner *= self.n_classes - 1 456 457 # if self.verbose == 1: 458 # pbar.update(n_iter) 459 460 expit_ensemble_learner = expit(ensemble_learner) 461 462 sum_ensemble = expit_ensemble_learner.sum(axis=1) 463 464 return expit_ensemble_learner / sum_ensemble[:, None]
Predict probabilities for test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
46class Base(BaseEstimator): 47 """Base model from which all the other classes inherit. 48 49 This class contains the most important data preprocessing/feature engineering methods. 50 51 Parameters: 52 53 n_hidden_features: int 54 number of nodes in the hidden layer 55 56 activation_name: str 57 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 58 59 a: float 60 hyperparameter for 'prelu' or 'elu' activation function 61 62 nodes_sim: str 63 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 64 'uniform' 65 66 bias: boolean 67 indicates if the hidden layer contains a bias term (True) or 68 not (False) 69 70 dropout: float 71 regularization parameter; (random) percentage of nodes dropped out 72 of the training 73 74 direct_link: boolean 75 indicates if the original features are included (True) in model's 76 fitting or not (False) 77 78 n_clusters: int 79 number of clusters for type_clust='kmeans' or type_clust='gmm' 80 clustering (could be 0: no clustering) 81 82 cluster_encode: bool 83 defines how the variable containing clusters is treated (default is one-hot); 84 if `False`, then labels are used, without one-hot encoding 85 86 type_clust: str 87 type of clustering method: currently k-means ('kmeans') or Gaussian 88 Mixture Model ('gmm') 89 90 type_scaling: a tuple of 3 strings 91 scaling methods for inputs, hidden layer, and clustering respectively 92 (and when relevant). 93 Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs') 94 95 col_sample: float 96 percentage of features randomly chosen for training 97 98 row_sample: float 99 percentage of rows chosen for training, by stratified bootstrapping 100 101 seed: int 102 reproducibility seed for nodes_sim=='uniform', clustering and dropout 103 104 backend: str 105 "cpu" or "gpu" or "tpu" 106 107 """ 108 109 # construct the object ----- 110 111 def __init__( 112 self, 113 n_hidden_features=5, 114 activation_name="relu", 115 a=0.01, 116 nodes_sim="sobol", 117 bias=True, 118 dropout=0, 119 direct_link=True, 120 n_clusters=2, 121 cluster_encode=True, 122 type_clust="kmeans", 123 type_scaling=("std", "std", "std"), 124 col_sample=1, 125 row_sample=1, 126 seed=123, 127 backend="cpu", 128 ): 129 # input checks ----- 130 131 sys_platform = platform.system() 132 133 if (sys_platform == "Windows") and (backend in ("gpu", "tpu")): 134 warnings.warn("No GPU/TPU computing on Windows yet, backend set to 'cpu'") 135 backend = "cpu" 136 137 assert activation_name in ( 138 "relu", 139 "tanh", 140 "sigmoid", 141 "prelu", 142 "elu", 143 ), "'activation_name' must be in ('relu', 'tanh', 'sigmoid','prelu', 'elu')" 144 145 assert nodes_sim in ( 146 "sobol", 147 "hammersley", 148 "uniform", 149 "halton", 150 ), "'nodes_sim' must be in ('sobol', 'hammersley', 'uniform', 'halton')" 151 152 assert type_clust in ( 153 "kmeans", 154 "gmm", 155 ), "'type_clust' must be in ('kmeans', 'gmm')" 156 157 assert (len(type_scaling) == 3) & all( 158 type_scaling[i] in ("minmax", "std", "robust", "maxabs") 159 for i in range(len(type_scaling)) 160 ), "'type_scaling' must have length 3, and available scaling methods are 'minmax' scaling, standardization ('std'), robust scaling ('robust') and max absolute ('maxabs')" 161 162 assert (col_sample >= 0) & ( 163 col_sample <= 1 164 ), "'col_sample' must be comprised between 0 and 1 (both included)" 165 166 assert backend in ( 167 "cpu", 168 "gpu", 169 "tpu", 170 ), "must have 'backend' in ('cpu', 'gpu', 'tpu')" 171 172 self.n_hidden_features = n_hidden_features 173 self.activation_name = activation_name 174 self.a = a 175 self.nodes_sim = nodes_sim 176 self.bias = bias 177 self.seed = seed 178 self.backend = backend 179 self.dropout = dropout 180 self.direct_link = direct_link 181 self.cluster_encode = cluster_encode 182 self.type_clust = type_clust 183 self.type_scaling = type_scaling 184 self.col_sample = col_sample 185 self.row_sample = row_sample 186 self.n_clusters = n_clusters 187 if isinstance(self, RegressorMixin): 188 self.type_fit = "regression" 189 elif isinstance(self, ClassifierMixin): 190 self.type_fit = "classification" 191 self.subsampler_ = None 192 self.index_col_ = None 193 self.index_row_ = True 194 self.clustering_obj_ = None 195 self.clustering_scaler_ = None 196 self.nn_scaler_ = None 197 self.scaler_ = None 198 self.encoder_ = None 199 self.W_ = None 200 self.X_ = None 201 self.y_ = None 202 self.y_mean_ = None 203 self.beta_ = None 204 205 # activation function ----- 206 if sys_platform in ("Linux", "Darwin"): 207 activation_options = { 208 "relu": ac.relu if (self.backend == "cpu") else jnn.relu, 209 "tanh": np.tanh if (self.backend == "cpu") else jnp.tanh, 210 "sigmoid": (ac.sigmoid if (self.backend == "cpu") else jnn.sigmoid), 211 "prelu": partial(ac.prelu, a=a), 212 "elu": ( 213 partial(ac.elu, a=a) 214 if (self.backend == "cpu") 215 else partial(jnn.elu, a=a) 216 ), 217 } 218 else: # on Windows currently, no JAX 219 activation_options = { 220 "relu": (ac.relu if (self.backend == "cpu") else NotImplementedError), 221 "tanh": (np.tanh if (self.backend == "cpu") else NotImplementedError), 222 "sigmoid": ( 223 ac.sigmoid if (self.backend == "cpu") else NotImplementedError 224 ), 225 "prelu": partial(ac.prelu, a=a), 226 "elu": ( 227 partial(ac.elu, a=a) 228 if (self.backend == "cpu") 229 else NotImplementedError 230 ), 231 } 232 self.activation_func = activation_options[activation_name] 233 234 # "preprocessing" methods to be inherited ----- 235 236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16) 301 302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 # self.W_ = hash_sim[self.nodes_sim]( 357 # n_dims=n_features, 358 # n_points=self.n_hidden_features, 359 # seed=self.seed, 360 # ) 361 362 assert ( 363 scaled_X.shape[1] == self.W_.shape[0] 364 ), "check dimensions of covariates X and matrix W" 365 366 return mo.dropout( 367 x=self.activation_func( 368 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 369 ), 370 drop_prob=self.dropout, 371 seed=self.seed, 372 ) 373 374 # W is not none 375 assert ( 376 scaled_X.shape[1] == W.shape[0] 377 ), "check dimensions of covariates X and matrix W" 378 379 # self.W_ = W 380 return mo.dropout( 381 x=self.activation_func( 382 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 383 ), 384 drop_prob=self.dropout, 385 seed=self.seed, 386 ) 387 388 # with bias term in the hidden layer 389 if W is None: 390 n_features_1 = n_features + 1 391 392 if self.nodes_sim == "sobol": 393 self.W_ = generate_sobol( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "hammersley": 399 self.W_ = generate_hammersley( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 elif self.nodes_sim == "uniform": 405 self.W_ = generate_uniform( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 else: 411 self.W_ = generate_halton( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 417 # self.W_ = hash_sim[self.nodes_sim]( 418 # n_dims=n_features_1, 419 # n_points=self.n_hidden_features, 420 # seed=self.seed, 421 # ) 422 423 return mo.dropout( 424 x=self.activation_func( 425 mo.safe_sparse_dot( 426 a=mo.cbind( 427 np.ones(scaled_X.shape[0]), 428 scaled_X, 429 backend=self.backend, 430 ), 431 b=self.W_, 432 backend=self.backend, 433 ) 434 ), 435 drop_prob=self.dropout, 436 seed=self.seed, 437 ) 438 439 # W is not None 440 # self.W_ = W 441 return mo.dropout( 442 x=self.activation_func( 443 mo.safe_sparse_dot( 444 a=mo.cbind( 445 np.ones(scaled_X.shape[0]), 446 scaled_X, 447 backend=self.backend, 448 ), 449 b=W, 450 backend=self.backend, 451 ) 452 ), 453 drop_prob=self.dropout, 454 seed=self.seed, 455 ) 456 457 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 458 """Create new hidden features for training set, with hidden layer, center the response. 459 460 Parameters: 461 462 y: array-like, shape = [n_samples] 463 Target values 464 465 X: {array-like}, shape = [n_samples, n_features] 466 Training vectors, where n_samples is the number 467 of samples and n_features is the number of features 468 469 W: {array-like}, shape = [n_features, hidden_features] 470 if provided, constructs the hidden layer via W 471 472 Returns: 473 474 (centered response, direct link + hidden layer matrix): {tuple} 475 476 """ 477 478 # either X and y are stored or not 479 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 480 if self.n_hidden_features > 0: # has a hidden layer 481 assert ( 482 len(self.type_scaling) >= 2 483 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 484 485 if X is None: 486 487 if self.col_sample == 1: 488 input_X = self.X_ 489 else: 490 n_features = self.X_.shape[1] 491 new_n_features = int(np.ceil(n_features * self.col_sample)) 492 assert ( 493 new_n_features >= 1 494 ), "check class attribute 'col_sample' and the number of covariates provided for X" 495 np.random.seed(self.seed) 496 index_col = np.random.choice( 497 range(n_features), size=new_n_features, replace=False 498 ) 499 self.index_col_ = index_col 500 input_X = self.X_[:, self.index_col_] 501 502 else: # X is not None # keep X vs self.X_ 503 504 if isinstance(X, pd.DataFrame): 505 X = copy.deepcopy(X.values.astype(float)) 506 507 if self.col_sample == 1: 508 input_X = X 509 else: 510 n_features = X.shape[1] 511 new_n_features = int(np.ceil(n_features * self.col_sample)) 512 assert ( 513 new_n_features >= 1 514 ), "check class attribute 'col_sample' and the number of covariates provided for X" 515 np.random.seed(self.seed) 516 index_col = np.random.choice( 517 range(n_features), size=new_n_features, replace=False 518 ) 519 self.index_col_ = index_col 520 input_X = X[:, self.index_col_] 521 522 if self.n_clusters <= 0: 523 # data without any clustering: self.n_clusters is None ----- 524 525 if self.n_hidden_features > 0: # with hidden layer 526 527 self.nn_scaler_, scaled_X = mo.scale_covariates( 528 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 529 ) 530 Phi_X = ( 531 self.create_layer(scaled_X) 532 if W is None 533 else self.create_layer(scaled_X, W=W) 534 ) 535 Z = ( 536 mo.cbind(input_X, Phi_X, backend=self.backend) 537 if self.direct_link is True 538 else Phi_X 539 ) 540 self.scaler_, scaled_Z = mo.scale_covariates( 541 Z, choice=self.type_scaling[0], scaler=self.scaler_ 542 ) 543 else: # no hidden layer 544 Z = input_X 545 self.scaler_, scaled_Z = mo.scale_covariates( 546 Z, choice=self.type_scaling[0], scaler=self.scaler_ 547 ) 548 549 else: 550 551 # data with clustering: self.n_clusters is not None ----- # keep 552 553 augmented_X = mo.cbind( 554 input_X, 555 self.encode_clusters(input_X, **kwargs), 556 backend=self.backend, 557 ) 558 559 if self.n_hidden_features > 0: # with hidden layer 560 561 self.nn_scaler_, scaled_X = mo.scale_covariates( 562 augmented_X, 563 choice=self.type_scaling[1], 564 scaler=self.nn_scaler_, 565 ) 566 Phi_X = ( 567 self.create_layer(scaled_X) 568 if W is None 569 else self.create_layer(scaled_X, W=W) 570 ) 571 Z = ( 572 mo.cbind(augmented_X, Phi_X, backend=self.backend) 573 if self.direct_link is True 574 else Phi_X 575 ) 576 self.scaler_, scaled_Z = mo.scale_covariates( 577 Z, choice=self.type_scaling[0], scaler=self.scaler_ 578 ) 579 else: # no hidden layer 580 Z = augmented_X 581 self.scaler_, scaled_Z = mo.scale_covariates( 582 Z, choice=self.type_scaling[0], scaler=self.scaler_ 583 ) 584 585 # Returning model inputs ----- 586 if mx.is_factor(y) is False: # regression 587 # center y 588 if y is None: 589 self.y_mean_, centered_y = mo.center_response(self.y_) 590 else: 591 self.y_mean_, centered_y = mo.center_response(y) 592 593 # y is subsampled 594 if self.row_sample < 1: 595 n, p = Z.shape 596 597 self.subsampler_ = ( 598 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 599 if y is None 600 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 601 ) 602 603 self.index_row_ = self.subsampler_.subsample() 604 605 n_row_sample = len(self.index_row_) 606 # regression 607 return ( 608 centered_y[self.index_row_].reshape(n_row_sample), 609 self.scaler_.transform( 610 Z[self.index_row_, :].reshape(n_row_sample, p) 611 ), 612 ) 613 # y is not subsampled 614 # regression 615 return (centered_y, self.scaler_.transform(Z)) 616 617 # classification 618 # y is subsampled 619 if self.row_sample < 1: 620 n, p = Z.shape 621 622 self.subsampler_ = ( 623 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 624 if y is None 625 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 626 ) 627 628 self.index_row_ = self.subsampler_.subsample() 629 630 n_row_sample = len(self.index_row_) 631 # classification 632 return ( 633 y[self.index_row_].reshape(n_row_sample), 634 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 635 ) 636 # y is not subsampled 637 # classification 638 return (y, self.scaler_.transform(Z)) 639 640 def cook_test_set(self, X, **kwargs): 641 """Transform data from test set, with hidden layer. 642 643 Parameters: 644 645 X: {array-like}, shape = [n_samples, n_features] 646 Training vectors, where n_samples is the number 647 of samples and n_features is the number of features 648 649 **kwargs: additional parameters to be passed to self.encode_cluster 650 651 Returns: 652 653 Transformed test set : {array-like} 654 """ 655 656 if isinstance(X, pd.DataFrame): 657 X = copy.deepcopy(X.values.astype(float)) 658 659 if len(X.shape) == 1: 660 X = X.reshape(1, -1) 661 662 if ( 663 self.n_clusters == 0 664 ): # data without clustering: self.n_clusters is None ----- 665 if self.n_hidden_features > 0: 666 # if hidden layer 667 scaled_X = ( 668 self.nn_scaler_.transform(X) 669 if (self.col_sample == 1) 670 else self.nn_scaler_.transform(X[:, self.index_col_]) 671 ) 672 Phi_X = self.create_layer(scaled_X, self.W_) 673 if self.direct_link == True: 674 return self.scaler_.transform( 675 mo.cbind(scaled_X, Phi_X, backend=self.backend) 676 ) 677 # when self.direct_link == False 678 return self.scaler_.transform(Phi_X) 679 # if no hidden layer # self.n_hidden_features == 0 680 return self.scaler_.transform(X) 681 682 # data with clustering: self.n_clusters > 0 ----- 683 if self.col_sample == 1: 684 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 685 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 686 else: 687 predicted_clusters = self.encode_clusters( 688 X=X[:, self.index_col_], predict=True, **kwargs 689 ) 690 augmented_X = mo.cbind( 691 X[:, self.index_col_], predicted_clusters, backend=self.backend 692 ) 693 694 if self.n_hidden_features > 0: # if hidden layer 695 scaled_X = self.nn_scaler_.transform(augmented_X) 696 Phi_X = self.create_layer(scaled_X, self.W_) 697 if self.direct_link == True: 698 return self.scaler_.transform( 699 mo.cbind(augmented_X, Phi_X, backend=self.backend) 700 ) 701 return self.scaler_.transform(Phi_X) 702 703 # if no hidden layer 704 return self.scaler_.transform(augmented_X) 705 706 def cross_val_score( 707 self, 708 X, 709 y, 710 cv=5, 711 scoring="accuracy", 712 random_state=42, 713 n_jobs=-1, 714 epsilon=0.5, 715 penalized=True, 716 objective="abs", 717 **kwargs 718 ): 719 """ 720 Penalized Cross-validation score for a model. 721 722 Parameters: 723 724 X: {array-like}, shape = [n_samples, n_features] 725 Training vectors, where n_samples is the number 726 of samples and n_features is the number of features 727 728 y: array-like, shape = [n_samples] 729 Target values 730 731 X_test: {array-like}, shape = [n_samples, n_features] 732 Test vectors, where n_samples is the number 733 of samples and n_features is the number of features 734 735 y_test: array-like, shape = [n_samples] 736 Target values 737 738 cv: int 739 Number of folds 740 741 scoring: str 742 Scoring metric 743 744 random_state: int 745 Random state 746 747 n_jobs: int 748 Number of jobs to run in parallel 749 750 epsilon: float 751 Penalty parameter 752 753 penalized: bool 754 Whether to obtain penalized cross-validation score or not 755 756 objective: str 757 'abs': Minimize the absolute difference between cross-validation score and validation score 758 'relative': Minimize the relative difference between cross-validation score and validation score 759 Returns: 760 761 A namedtuple with the following fields: 762 - cv_score: float 763 cross-validation score 764 - val_score: float 765 validation score 766 - penalized_score: float 767 penalized cross-validation score: cv_score / val_score + epsilon*(1/val_score + 1/cv_score) 768 If higher scoring metric is better, minimize the function result. 769 If lower scoring metric is better, maximize the function result. 770 """ 771 if scoring == "accuracy": 772 scoring_func = accuracy_score 773 elif scoring == "balanced_accuracy": 774 scoring_func = balanced_accuracy_score 775 elif scoring == "f1": 776 scoring_func = f1_score 777 elif scoring == "roc_auc": 778 scoring_func = roc_auc_score 779 elif scoring == "r2": 780 scoring_func = r2_score 781 elif scoring == "mse": 782 scoring_func = mean_squared_error 783 elif scoring == "mae": 784 scoring_func = mean_absolute_error 785 elif scoring == "mape": 786 scoring_func = mean_absolute_percentage_error 787 elif scoring == "rmse": 788 789 def scoring_func(y_true, y_pred): 790 return np.sqrt(mean_squared_error(y_true, y_pred)) 791 792 X_train, X_val, y_train, y_val = train_test_split( 793 X, y, test_size=0.2, random_state=random_state 794 ) 795 796 res = cross_val_score( 797 self, X_train, y_train, cv=cv, scoring=scoring, n_jobs=n_jobs 798 ) # cross-validation error 799 800 if penalized == False: 801 return res 802 803 DescribeResult = namedtuple( 804 "DescribeResult", ["cv_score", "val_score", "penalized_score"] 805 ) 806 807 numerator = res.mean() 808 809 # Evaluate on the (cv+1)-th fold 810 preds_val = self.fit(X_train, y_train).predict(X_val) 811 try: 812 denominator = scoring(y_val, preds_val) # validation error 813 except Exception as e: 814 denominator = scoring_func(y_val, preds_val) 815 816 # if higher is better 817 if objective == "abs": 818 penalized_score = np.abs(numerator - denominator) + epsilon * ( 819 1 / denominator + 1 / numerator 820 ) 821 elif objective == "relative": 822 ratio = numerator / denominator 823 penalized_score = np.abs(ratio - 1) + epsilon * ( 824 1 / denominator + 1 / numerator 825 ) 826 827 return DescribeResult( 828 cv_score=numerator, 829 val_score=denominator, 830 penalized_score=penalized_score, 831 )
Base model from which all the other classes inherit.
This class contains the most important data preprocessing/feature engineering methods.
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax') or robust scaling ('robust') or max absolute scaling ('maxabs')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
236 def encode_clusters(self, X=None, predict=False, scaler=None, **kwargs): # 237 """Create new covariates with kmeans or GMM clustering 238 239 Parameters: 240 241 X: {array-like}, shape = [n_samples, n_features] 242 Training vectors, where n_samples is the number 243 of samples and n_features is the number of features. 244 245 predict: boolean 246 is False on training set and True on test set 247 248 scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler 249 if scaler has already been fitted on training data (online training), it can be passed here 250 251 **kwargs: 252 additional parameters to be passed to the 253 clustering method 254 255 Returns: 256 257 Clusters' matrix, one-hot encoded: {array-like} 258 259 """ 260 261 np.random.seed(self.seed) 262 263 if X is None: 264 X = self.X_ 265 266 if isinstance(X, pd.DataFrame): 267 X = copy.deepcopy(X.values.astype(float)) 268 269 if len(X.shape) == 1: 270 X = X.reshape(1, -1) 271 272 if predict is False: # encode training set 273 274 # scale input data before clustering 275 self.clustering_scaler_, scaled_X = mo.scale_covariates( 276 X, choice=self.type_scaling[2], scaler=self.clustering_scaler_ 277 ) 278 279 self.clustering_obj_, X_clustered = mo.cluster_covariates( 280 scaled_X, 281 self.n_clusters, 282 self.seed, 283 type_clust=self.type_clust, 284 **kwargs 285 ) 286 287 if self.cluster_encode == True: 288 return mo.one_hot_encode(X_clustered, self.n_clusters).astype( 289 np.float16 290 ) 291 292 return X_clustered.astype(np.float16) 293 294 # if predict == True, encode test set 295 X_clustered = self.clustering_obj_.predict(self.clustering_scaler_.transform(X)) 296 297 if self.cluster_encode == True: 298 return mo.one_hot_encode(X_clustered, self.n_clusters).astype(np.float16) 299 300 return X_clustered.astype(np.float16)
Create new covariates with kmeans or GMM clustering
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
predict: boolean
is False on training set and True on test set
scaler: {object} of class StandardScaler, MinMaxScaler, RobustScaler or MaxAbsScaler
if scaler has already been fitted on training data (online training), it can be passed here
**kwargs:
additional parameters to be passed to the
clustering method
Returns:
Clusters' matrix, one-hot encoded: {array-like}
302 def create_layer(self, scaled_X, W=None): 303 """Create hidden layer. 304 305 Parameters: 306 307 scaled_X: {array-like}, shape = [n_samples, n_features] 308 Training vectors, where n_samples is the number 309 of samples and n_features is the number of features 310 311 W: {array-like}, shape = [n_features, hidden_features] 312 if provided, constructs the hidden layer with W; otherwise computed internally 313 314 Returns: 315 316 Hidden layer matrix: {array-like} 317 318 """ 319 320 n_features = scaled_X.shape[1] 321 322 # hash_sim = { 323 # "sobol": generate_sobol, 324 # "hammersley": generate_hammersley, 325 # "uniform": generate_uniform, 326 # "halton": generate_halton 327 # } 328 329 if self.bias is False: # no bias term in the hidden layer 330 if W is None: 331 if self.nodes_sim == "sobol": 332 self.W_ = generate_sobol( 333 n_dims=n_features, 334 n_points=self.n_hidden_features, 335 seed=self.seed, 336 ) 337 elif self.nodes_sim == "hammersley": 338 self.W_ = generate_hammersley( 339 n_dims=n_features, 340 n_points=self.n_hidden_features, 341 seed=self.seed, 342 ) 343 elif self.nodes_sim == "uniform": 344 self.W_ = generate_uniform( 345 n_dims=n_features, 346 n_points=self.n_hidden_features, 347 seed=self.seed, 348 ) 349 else: 350 self.W_ = generate_halton( 351 n_dims=n_features, 352 n_points=self.n_hidden_features, 353 seed=self.seed, 354 ) 355 356 # self.W_ = hash_sim[self.nodes_sim]( 357 # n_dims=n_features, 358 # n_points=self.n_hidden_features, 359 # seed=self.seed, 360 # ) 361 362 assert ( 363 scaled_X.shape[1] == self.W_.shape[0] 364 ), "check dimensions of covariates X and matrix W" 365 366 return mo.dropout( 367 x=self.activation_func( 368 mo.safe_sparse_dot(a=scaled_X, b=self.W_, backend=self.backend) 369 ), 370 drop_prob=self.dropout, 371 seed=self.seed, 372 ) 373 374 # W is not none 375 assert ( 376 scaled_X.shape[1] == W.shape[0] 377 ), "check dimensions of covariates X and matrix W" 378 379 # self.W_ = W 380 return mo.dropout( 381 x=self.activation_func( 382 mo.safe_sparse_dot(a=scaled_X, b=W, backend=self.backend) 383 ), 384 drop_prob=self.dropout, 385 seed=self.seed, 386 ) 387 388 # with bias term in the hidden layer 389 if W is None: 390 n_features_1 = n_features + 1 391 392 if self.nodes_sim == "sobol": 393 self.W_ = generate_sobol( 394 n_dims=n_features_1, 395 n_points=self.n_hidden_features, 396 seed=self.seed, 397 ) 398 elif self.nodes_sim == "hammersley": 399 self.W_ = generate_hammersley( 400 n_dims=n_features_1, 401 n_points=self.n_hidden_features, 402 seed=self.seed, 403 ) 404 elif self.nodes_sim == "uniform": 405 self.W_ = generate_uniform( 406 n_dims=n_features_1, 407 n_points=self.n_hidden_features, 408 seed=self.seed, 409 ) 410 else: 411 self.W_ = generate_halton( 412 n_dims=n_features_1, 413 n_points=self.n_hidden_features, 414 seed=self.seed, 415 ) 416 417 # self.W_ = hash_sim[self.nodes_sim]( 418 # n_dims=n_features_1, 419 # n_points=self.n_hidden_features, 420 # seed=self.seed, 421 # ) 422 423 return mo.dropout( 424 x=self.activation_func( 425 mo.safe_sparse_dot( 426 a=mo.cbind( 427 np.ones(scaled_X.shape[0]), 428 scaled_X, 429 backend=self.backend, 430 ), 431 b=self.W_, 432 backend=self.backend, 433 ) 434 ), 435 drop_prob=self.dropout, 436 seed=self.seed, 437 ) 438 439 # W is not None 440 # self.W_ = W 441 return mo.dropout( 442 x=self.activation_func( 443 mo.safe_sparse_dot( 444 a=mo.cbind( 445 np.ones(scaled_X.shape[0]), 446 scaled_X, 447 backend=self.backend, 448 ), 449 b=W, 450 backend=self.backend, 451 ) 452 ), 453 drop_prob=self.dropout, 454 seed=self.seed, 455 )
Create hidden layer.
Parameters:
scaled_X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer with W; otherwise computed internally
Returns:
Hidden layer matrix: {array-like}
457 def cook_training_set(self, y=None, X=None, W=None, **kwargs): 458 """Create new hidden features for training set, with hidden layer, center the response. 459 460 Parameters: 461 462 y: array-like, shape = [n_samples] 463 Target values 464 465 X: {array-like}, shape = [n_samples, n_features] 466 Training vectors, where n_samples is the number 467 of samples and n_features is the number of features 468 469 W: {array-like}, shape = [n_features, hidden_features] 470 if provided, constructs the hidden layer via W 471 472 Returns: 473 474 (centered response, direct link + hidden layer matrix): {tuple} 475 476 """ 477 478 # either X and y are stored or not 479 # assert ((y is None) & (X is None)) | ((y is not None) & (X is not None)) 480 if self.n_hidden_features > 0: # has a hidden layer 481 assert ( 482 len(self.type_scaling) >= 2 483 ), "must have len(self.type_scaling) >= 2 when self.n_hidden_features > 0" 484 485 if X is None: 486 487 if self.col_sample == 1: 488 input_X = self.X_ 489 else: 490 n_features = self.X_.shape[1] 491 new_n_features = int(np.ceil(n_features * self.col_sample)) 492 assert ( 493 new_n_features >= 1 494 ), "check class attribute 'col_sample' and the number of covariates provided for X" 495 np.random.seed(self.seed) 496 index_col = np.random.choice( 497 range(n_features), size=new_n_features, replace=False 498 ) 499 self.index_col_ = index_col 500 input_X = self.X_[:, self.index_col_] 501 502 else: # X is not None # keep X vs self.X_ 503 504 if isinstance(X, pd.DataFrame): 505 X = copy.deepcopy(X.values.astype(float)) 506 507 if self.col_sample == 1: 508 input_X = X 509 else: 510 n_features = X.shape[1] 511 new_n_features = int(np.ceil(n_features * self.col_sample)) 512 assert ( 513 new_n_features >= 1 514 ), "check class attribute 'col_sample' and the number of covariates provided for X" 515 np.random.seed(self.seed) 516 index_col = np.random.choice( 517 range(n_features), size=new_n_features, replace=False 518 ) 519 self.index_col_ = index_col 520 input_X = X[:, self.index_col_] 521 522 if self.n_clusters <= 0: 523 # data without any clustering: self.n_clusters is None ----- 524 525 if self.n_hidden_features > 0: # with hidden layer 526 527 self.nn_scaler_, scaled_X = mo.scale_covariates( 528 input_X, choice=self.type_scaling[1], scaler=self.nn_scaler_ 529 ) 530 Phi_X = ( 531 self.create_layer(scaled_X) 532 if W is None 533 else self.create_layer(scaled_X, W=W) 534 ) 535 Z = ( 536 mo.cbind(input_X, Phi_X, backend=self.backend) 537 if self.direct_link is True 538 else Phi_X 539 ) 540 self.scaler_, scaled_Z = mo.scale_covariates( 541 Z, choice=self.type_scaling[0], scaler=self.scaler_ 542 ) 543 else: # no hidden layer 544 Z = input_X 545 self.scaler_, scaled_Z = mo.scale_covariates( 546 Z, choice=self.type_scaling[0], scaler=self.scaler_ 547 ) 548 549 else: 550 551 # data with clustering: self.n_clusters is not None ----- # keep 552 553 augmented_X = mo.cbind( 554 input_X, 555 self.encode_clusters(input_X, **kwargs), 556 backend=self.backend, 557 ) 558 559 if self.n_hidden_features > 0: # with hidden layer 560 561 self.nn_scaler_, scaled_X = mo.scale_covariates( 562 augmented_X, 563 choice=self.type_scaling[1], 564 scaler=self.nn_scaler_, 565 ) 566 Phi_X = ( 567 self.create_layer(scaled_X) 568 if W is None 569 else self.create_layer(scaled_X, W=W) 570 ) 571 Z = ( 572 mo.cbind(augmented_X, Phi_X, backend=self.backend) 573 if self.direct_link is True 574 else Phi_X 575 ) 576 self.scaler_, scaled_Z = mo.scale_covariates( 577 Z, choice=self.type_scaling[0], scaler=self.scaler_ 578 ) 579 else: # no hidden layer 580 Z = augmented_X 581 self.scaler_, scaled_Z = mo.scale_covariates( 582 Z, choice=self.type_scaling[0], scaler=self.scaler_ 583 ) 584 585 # Returning model inputs ----- 586 if mx.is_factor(y) is False: # regression 587 # center y 588 if y is None: 589 self.y_mean_, centered_y = mo.center_response(self.y_) 590 else: 591 self.y_mean_, centered_y = mo.center_response(y) 592 593 # y is subsampled 594 if self.row_sample < 1: 595 n, p = Z.shape 596 597 self.subsampler_ = ( 598 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 599 if y is None 600 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 601 ) 602 603 self.index_row_ = self.subsampler_.subsample() 604 605 n_row_sample = len(self.index_row_) 606 # regression 607 return ( 608 centered_y[self.index_row_].reshape(n_row_sample), 609 self.scaler_.transform( 610 Z[self.index_row_, :].reshape(n_row_sample, p) 611 ), 612 ) 613 # y is not subsampled 614 # regression 615 return (centered_y, self.scaler_.transform(Z)) 616 617 # classification 618 # y is subsampled 619 if self.row_sample < 1: 620 n, p = Z.shape 621 622 self.subsampler_ = ( 623 SubSampler(y=self.y_, row_sample=self.row_sample, seed=self.seed) 624 if y is None 625 else SubSampler(y=y, row_sample=self.row_sample, seed=self.seed) 626 ) 627 628 self.index_row_ = self.subsampler_.subsample() 629 630 n_row_sample = len(self.index_row_) 631 # classification 632 return ( 633 y[self.index_row_].reshape(n_row_sample), 634 self.scaler_.transform(Z[self.index_row_, :].reshape(n_row_sample, p)), 635 ) 636 # y is not subsampled 637 # classification 638 return (y, self.scaler_.transform(Z))
Create new hidden features for training set, with hidden layer, center the response.
Parameters:
y: array-like, shape = [n_samples]
Target values
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
W: {array-like}, shape = [n_features, hidden_features]
if provided, constructs the hidden layer via W
Returns:
(centered response, direct link + hidden layer matrix): {tuple}
640 def cook_test_set(self, X, **kwargs): 641 """Transform data from test set, with hidden layer. 642 643 Parameters: 644 645 X: {array-like}, shape = [n_samples, n_features] 646 Training vectors, where n_samples is the number 647 of samples and n_features is the number of features 648 649 **kwargs: additional parameters to be passed to self.encode_cluster 650 651 Returns: 652 653 Transformed test set : {array-like} 654 """ 655 656 if isinstance(X, pd.DataFrame): 657 X = copy.deepcopy(X.values.astype(float)) 658 659 if len(X.shape) == 1: 660 X = X.reshape(1, -1) 661 662 if ( 663 self.n_clusters == 0 664 ): # data without clustering: self.n_clusters is None ----- 665 if self.n_hidden_features > 0: 666 # if hidden layer 667 scaled_X = ( 668 self.nn_scaler_.transform(X) 669 if (self.col_sample == 1) 670 else self.nn_scaler_.transform(X[:, self.index_col_]) 671 ) 672 Phi_X = self.create_layer(scaled_X, self.W_) 673 if self.direct_link == True: 674 return self.scaler_.transform( 675 mo.cbind(scaled_X, Phi_X, backend=self.backend) 676 ) 677 # when self.direct_link == False 678 return self.scaler_.transform(Phi_X) 679 # if no hidden layer # self.n_hidden_features == 0 680 return self.scaler_.transform(X) 681 682 # data with clustering: self.n_clusters > 0 ----- 683 if self.col_sample == 1: 684 predicted_clusters = self.encode_clusters(X=X, predict=True, **kwargs) 685 augmented_X = mo.cbind(X, predicted_clusters, backend=self.backend) 686 else: 687 predicted_clusters = self.encode_clusters( 688 X=X[:, self.index_col_], predict=True, **kwargs 689 ) 690 augmented_X = mo.cbind( 691 X[:, self.index_col_], predicted_clusters, backend=self.backend 692 ) 693 694 if self.n_hidden_features > 0: # if hidden layer 695 scaled_X = self.nn_scaler_.transform(augmented_X) 696 Phi_X = self.create_layer(scaled_X, self.W_) 697 if self.direct_link == True: 698 return self.scaler_.transform( 699 mo.cbind(augmented_X, Phi_X, backend=self.backend) 700 ) 701 return self.scaler_.transform(Phi_X) 702 703 # if no hidden layer 704 return self.scaler_.transform(augmented_X)
Transform data from test set, with hidden layer.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.encode_cluster
Returns:
Transformed test set : {array-like}
15class BaseRegressor(Base, RegressorMixin): 16 """Random Vector Functional Link Network regression without shrinkage 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton', 31 'uniform' 32 33 bias: boolean 34 indicates if the hidden layer contains a bias term (True) or 35 not (False) 36 37 dropout: float 38 regularization parameter; (random) percentage of nodes dropped out 39 of the training 40 41 direct_link: boolean 42 indicates if the original features are included (True) in model's 43 fitting or not (False) 44 45 n_clusters: int 46 number of clusters for type_clust='kmeans' or type_clust='gmm' 47 clustering (could be 0: no clustering) 48 49 cluster_encode: bool 50 defines how the variable containing clusters is treated (default is one-hot); 51 if `False`, then labels are used, without one-hot encoding 52 53 type_clust: str 54 type of clustering method: currently k-means ('kmeans') or Gaussian 55 Mixture Model ('gmm') 56 57 type_scaling: a tuple of 3 strings 58 scaling methods for inputs, hidden layer, and clustering respectively 59 (and when relevant). 60 Currently available: standardization ('std') or MinMax scaling ('minmax') 61 62 col_sample: float 63 percentage of features randomly chosen for training 64 65 row_sample: float 66 percentage of rows chosen for training, by stratified bootstrapping 67 68 seed: int 69 reproducibility seed for nodes_sim=='uniform', clustering and dropout 70 71 backend: str 72 "cpu" or "gpu" or "tpu" 73 74 Attributes: 75 76 beta_: vector 77 regression coefficients 78 79 GCV_: float 80 Generalized Cross-Validation error 81 82 """ 83 84 # construct the object ----- 85 86 def __init__( 87 self, 88 n_hidden_features=5, 89 activation_name="relu", 90 a=0.01, 91 nodes_sim="sobol", 92 bias=True, 93 dropout=0, 94 direct_link=True, 95 n_clusters=2, 96 cluster_encode=True, 97 type_clust="kmeans", 98 type_scaling=("std", "std", "std"), 99 col_sample=1, 100 row_sample=1, 101 seed=123, 102 backend="cpu", 103 ): 104 super().__init__( 105 n_hidden_features=n_hidden_features, 106 activation_name=activation_name, 107 a=a, 108 nodes_sim=nodes_sim, 109 bias=bias, 110 dropout=dropout, 111 direct_link=direct_link, 112 n_clusters=n_clusters, 113 cluster_encode=cluster_encode, 114 type_clust=type_clust, 115 type_scaling=type_scaling, 116 col_sample=col_sample, 117 row_sample=row_sample, 118 seed=seed, 119 backend=backend, 120 ) 121 122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self 150 151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Random Vector Functional Link Network regression without shrinkage
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for hidden layer nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or
not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for type_clust='kmeans' or type_clust='gmm'
clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot);
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of features randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform', clustering and dropout
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: vector
regression coefficients
GCV_: float
Generalized Cross-Validation error
122 def fit(self, X, y, **kwargs): 123 """Fit BaseRegressor to training data (X, y) 124 125 Parameters: 126 127 X: {array-like}, shape = [n_samples, n_features] 128 Training vectors, where n_samples is the number 129 of samples and n_features is the number of features 130 131 y: array-like, shape = [n_samples] 132 Target values 133 134 **kwargs: additional parameters to be passed to self.cook_training_set 135 136 Returns: 137 138 self: object 139 """ 140 141 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 142 143 fit_obj = lmf.beta_Sigma_hat(X=scaled_Z, y=centered_y, backend=self.backend) 144 145 self.beta_ = fit_obj["beta_hat"] 146 147 self.GCV_ = fit_obj["GCV"] 148 149 return self
Fit BaseRegressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to self.cook_training_set
Returns:
self: object
151 def predict(self, X, **kwargs): 152 """Predict test data X. 153 154 Parameters: 155 156 X: {array-like}, shape = [n_samples, n_features] 157 Training vectors, where n_samples is the number 158 of samples and n_features is the number of features 159 160 **kwargs: additional parameters to be passed to self.cook_test_set 161 162 Returns: 163 164 model predictions: {array-like} 165 """ 166 167 if len(X.shape) == 1: 168 n_features = X.shape[0] 169 new_X = mo.rbind( 170 X.reshape(1, n_features), 171 np.ones(n_features).reshape(1, n_features), 172 ) 173 174 return ( 175 self.y_mean_ 176 + mo.safe_sparse_dot( 177 a=self.cook_test_set(new_X, **kwargs), 178 b=self.beta_, 179 backend=self.backend, 180 ) 181 )[0] 182 183 return self.y_mean_ + mo.safe_sparse_dot( 184 a=self.cook_test_set(X, **kwargs), 185 b=self.beta_, 186 backend=self.backend, 187 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFLRegressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with one prior 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s: float 61 std. dev. of regression parameters in Bayesian Ridge Regression 62 63 sigma: float 64 std. dev. of residuals in Bayesian Ridge Regression 65 66 return_std: boolean 67 if True, uncertainty around predictions is evaluated 68 69 backend: str 70 "cpu" or "gpu" or "tpu" 71 72 Attributes: 73 74 beta_: array-like 75 regression''s coefficients 76 77 Sigma_: array-like 78 covariance of the distribution of fitted parameters 79 80 GCV_: float 81 Generalized cross-validation error 82 83 y_mean_: float 84 average response 85 86 Examples: 87 88 ```python 89 TBD 90 ``` 91 92 """ 93 94 # construct the object ----- 95 96 def __init__( 97 self, 98 n_hidden_features=5, 99 activation_name="relu", 100 a=0.01, 101 nodes_sim="sobol", 102 bias=True, 103 dropout=0, 104 direct_link=True, 105 n_clusters=2, 106 cluster_encode=True, 107 type_clust="kmeans", 108 type_scaling=("std", "std", "std"), 109 seed=123, 110 s=0.1, 111 sigma=0.05, 112 return_std=True, 113 backend="cpu", 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 activation_name=activation_name, 118 a=a, 119 nodes_sim=nodes_sim, 120 bias=bias, 121 dropout=dropout, 122 direct_link=direct_link, 123 n_clusters=n_clusters, 124 cluster_encode=cluster_encode, 125 type_clust=type_clust, 126 type_scaling=type_scaling, 127 seed=seed, 128 backend=backend, 129 ) 130 self.s = s 131 self.sigma = sigma 132 self.beta_ = None 133 self.Sigma_ = None 134 self.GCV_ = None 135 self.return_std = return_std 136 137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self 178 179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with one prior
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s: float
std. dev. of regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
137 def fit(self, X, y, **kwargs): 138 """Fit BayesianRVFLRegressor to training data (X, y). 139 140 Parameters: 141 142 X: {array-like}, shape = [n_samples, n_features] 143 Training vectors, where n_samples is the number 144 of samples and n_features is the number of features. 145 146 y: array-like, shape = [n_samples] 147 Target values. 148 149 **kwargs: additional parameters to be passed to 150 self.cook_training_set 151 152 Returns: 153 154 self: object 155 156 """ 157 158 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 159 160 fit_obj = lmf.beta_Sigma_hat_rvfl( 161 X=scaled_Z, 162 y=centered_y, 163 s=self.s, 164 sigma=self.sigma, 165 fit_intercept=False, 166 return_cov=self.return_std, 167 backend=self.backend, 168 ) 169 170 self.beta_ = fit_obj["beta_hat"] 171 172 if self.return_std == True: 173 self.Sigma_ = fit_obj["Sigma_hat"] 174 175 self.GCV_ = fit_obj["GCV"] 176 177 return self
Fit BayesianRVFLRegressor to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
179 def predict(self, X, return_std=False, **kwargs): 180 """Predict test data X. 181 182 Parameters: 183 184 X: {array-like}, shape = [n_samples, n_features] 185 Training vectors, where n_samples is the number 186 of samples and n_features is the number of features. 187 188 return_std: {boolean}, standard dev. is returned or not 189 190 **kwargs: additional parameters to be passed to 191 self.cook_test_set 192 193 Returns: 194 195 model predictions: {array-like} 196 197 """ 198 199 if len(X.shape) == 1: # one observation in the test set only 200 n_features = X.shape[0] 201 new_X = mo.rbind( 202 x=X.reshape(1, n_features), 203 y=np.ones(n_features).reshape(1, n_features), 204 backend=self.backend, 205 ) 206 207 self.return_std = return_std 208 209 if self.return_std == False: 210 if len(X.shape) == 1: 211 return ( 212 self.y_mean_ 213 + mo.safe_sparse_dot( 214 a=self.cook_test_set(new_X, **kwargs), 215 b=self.beta_, 216 backend=self.backend, 217 ) 218 )[0] 219 220 return self.y_mean_ + mo.safe_sparse_dot( 221 a=self.cook_test_set(X, **kwargs), 222 b=self.beta_, 223 backend=self.backend, 224 ) 225 226 else: # confidence interval required for preds? 227 if len(X.shape) == 1: 228 Z = self.cook_test_set(new_X, **kwargs) 229 230 pred_obj = lmf.beta_Sigma_hat_rvfl( 231 s=self.s, 232 sigma=self.sigma, 233 X_star=Z, 234 return_cov=True, 235 beta_hat_=self.beta_, 236 Sigma_hat_=self.Sigma_, 237 backend=self.backend, 238 ) 239 240 return ( 241 self.y_mean_ + pred_obj["preds"][0], 242 pred_obj["preds_std"][0], 243 ) 244 245 Z = self.cook_test_set(X, **kwargs) 246 247 pred_obj = lmf.beta_Sigma_hat_rvfl( 248 s=self.s, 249 sigma=self.sigma, 250 X_star=Z, 251 return_cov=True, 252 beta_hat_=self.beta_, 253 Sigma_hat_=self.Sigma_, 254 backend=self.backend, 255 ) 256 257 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
15class BayesianRVFL2Regressor(Base, RegressorMixin): 16 """Bayesian Random Vector Functional Link Network regression with two priors 17 18 Parameters: 19 20 n_hidden_features: int 21 number of nodes in the hidden layer 22 23 activation_name: str 24 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 25 26 a: float 27 hyperparameter for 'prelu' or 'elu' activation function 28 29 nodes_sim: str 30 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform' 31 32 bias: boolean 33 indicates if the hidden layer contains a bias term (True) or not (False) 34 35 dropout: float 36 regularization parameter; (random) percentage of nodes dropped out 37 of the training 38 39 direct_link: boolean 40 indicates if the original features are included (True) in model''s fitting or not (False) 41 42 n_clusters: int 43 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering) 44 45 cluster_encode: bool 46 defines how the variable containing clusters is treated (default is one-hot) 47 if `False`, then labels are used, without one-hot encoding 48 49 type_clust: str 50 type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm') 51 52 type_scaling: a tuple of 3 strings 53 scaling methods for inputs, hidden layer, and clustering respectively 54 (and when relevant). 55 Currently available: standardization ('std') or MinMax scaling ('minmax') 56 57 seed: int 58 reproducibility seed for nodes_sim=='uniform' 59 60 s1: float 61 std. dev. of init. regression parameters in Bayesian Ridge Regression 62 63 s2: float 64 std. dev. of augmented regression parameters in Bayesian Ridge Regression 65 66 sigma: float 67 std. dev. of residuals in Bayesian Ridge Regression 68 69 return_std: boolean 70 if True, uncertainty around predictions is evaluated 71 72 backend: str 73 "cpu" or "gpu" or "tpu" 74 75 Attributes: 76 77 beta_: array-like 78 regression''s coefficients 79 80 Sigma_: array-like 81 covariance of the distribution of fitted parameters 82 83 GCV_: float 84 Generalized cross-validation error 85 86 y_mean_: float 87 average response 88 89 Examples: 90 91 ```python 92 TBD 93 ``` 94 95 """ 96 97 # construct the object ----- 98 99 def __init__( 100 self, 101 n_hidden_features=5, 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=0, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 seed=123, 113 s1=0.1, 114 s2=0.1, 115 sigma=0.05, 116 return_std=True, 117 backend="cpu", 118 ): 119 super().__init__( 120 n_hidden_features=n_hidden_features, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 seed=seed, 132 backend=backend, 133 ) 134 135 self.s1 = s1 136 self.s2 = s2 137 self.sigma = sigma 138 self.beta_ = None 139 self.Sigma_ = None 140 self.GCV_ = None 141 self.return_std = return_std 142 143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self 204 205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Bayesian Random Vector Functional Link Network regression with two priors
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not (False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original features are included (True) in model''s fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
seed: int
reproducibility seed for nodes_sim=='uniform'
s1: float
std. dev. of init. regression parameters in Bayesian Ridge Regression
s2: float
std. dev. of augmented regression parameters in Bayesian Ridge Regression
sigma: float
std. dev. of residuals in Bayesian Ridge Regression
return_std: boolean
if True, uncertainty around predictions is evaluated
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: array-like
regression''s coefficients
Sigma_: array-like
covariance of the distribution of fitted parameters
GCV_: float
Generalized cross-validation error
y_mean_: float
average response
Examples:
TBD
143 def fit(self, X, y, **kwargs): 144 """Fit BayesianRVFL2Regressor to training data (X, y) 145 146 Parameters: 147 148 X: {array-like}, shape = [n_samples, n_features] 149 Training vectors, where n_samples is the number 150 of samples and n_features is the number of features 151 152 y: array-like, shape = [n_samples] 153 Target values 154 155 **kwargs: additional parameters to be passed to 156 self.cook_training_set 157 158 Returns: 159 160 self: object 161 162 """ 163 164 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 165 166 n, p = X.shape 167 q = self.n_hidden_features 168 169 if self.direct_link == True: 170 r = p + self.n_clusters 171 172 block11 = (self.s1**2) * np.eye(r) 173 block12 = np.zeros((r, q)) 174 block21 = np.zeros((q, r)) 175 block22 = (self.s2**2) * np.eye(q) 176 177 Sigma_prior = mo.rbind( 178 x=mo.cbind(x=block11, y=block12, backend=self.backend), 179 y=mo.cbind(x=block21, y=block22, backend=self.backend), 180 backend=self.backend, 181 ) 182 183 else: 184 Sigma_prior = (self.s2**2) * np.eye(q) 185 186 fit_obj = lmf.beta_Sigma_hat_rvfl2( 187 X=scaled_Z, 188 y=centered_y, 189 Sigma=Sigma_prior, 190 sigma=self.sigma, 191 fit_intercept=False, 192 return_cov=self.return_std, 193 backend=self.backend, 194 ) 195 196 self.beta_ = fit_obj["beta_hat"] 197 198 if self.return_std == True: 199 self.Sigma_ = fit_obj["Sigma_hat"] 200 201 self.GCV_ = fit_obj["GCV"] 202 203 return self
Fit BayesianRVFL2Regressor to training data (X, y)
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features
y: array-like, shape = [n_samples]
Target values
**kwargs: additional parameters to be passed to
self.cook_training_set
Returns:
self: object
205 def predict(self, X, return_std=False, **kwargs): 206 """Predict test data X. 207 208 Parameters: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 return_std: {boolean}, standard dev. is returned or not 215 216 **kwargs: additional parameters to be passed to 217 self.cook_test_set 218 219 Returns: 220 221 model predictions: {array-like} 222 223 """ 224 225 if len(X.shape) == 1: # one observation in the test set only 226 n_features = X.shape[0] 227 new_X = mo.rbind( 228 x=X.reshape(1, n_features), 229 y=np.ones(n_features).reshape(1, n_features), 230 backend=self.backend, 231 ) 232 233 self.return_std = return_std 234 235 if self.return_std == False: 236 if len(X.shape) == 1: 237 return ( 238 self.y_mean_ 239 + mo.safe_sparse_dot( 240 self.cook_test_set(new_X, **kwargs), 241 self.beta_, 242 backend=self.backend, 243 ) 244 )[0] 245 246 return self.y_mean_ + mo.safe_sparse_dot( 247 self.cook_test_set(X, **kwargs), 248 self.beta_, 249 backend=self.backend, 250 ) 251 252 else: # confidence interval required for preds? 253 if len(X.shape) == 1: 254 Z = self.cook_test_set(new_X, **kwargs) 255 256 pred_obj = lmf.beta_Sigma_hat_rvfl2( 257 X_star=Z, 258 return_cov=self.return_std, 259 beta_hat_=self.beta_, 260 Sigma_hat_=self.Sigma_, 261 backend=self.backend, 262 ) 263 264 return ( 265 self.y_mean_ + pred_obj["preds"][0], 266 pred_obj["preds_std"][0], 267 ) 268 269 Z = self.cook_test_set(X, **kwargs) 270 271 pred_obj = lmf.beta_Sigma_hat_rvfl2( 272 X_star=Z, 273 return_cov=self.return_std, 274 beta_hat_=self.beta_, 275 Sigma_hat_=self.Sigma_, 276 backend=self.backend, 277 ) 278 279 return (self.y_mean_ + pred_obj["preds"], pred_obj["preds_std"])
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
return_std: {boolean}, standard dev. is returned or not
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
42class ClassicalMTS(Base): 43 """Multivariate time series (FactorMTS) forecasting with Factor models 44 45 Parameters: 46 47 model: type of model: str. 48 currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta' 49 50 Attributes: 51 52 df_: data frame 53 the input data frame, in case a data.frame is provided to `fit` 54 55 level_: int 56 level of confidence for prediction intervals (default is 95) 57 58 Examples: 59 See examples/classical_mts_timeseries.py 60 """ 61 62 # construct the object ----- 63 64 def __init__(self, model="VAR"): 65 66 self.model = model 67 if self.model == "VAR": 68 self.obj = VAR 69 elif self.model == "VECM": 70 self.obj = VECM 71 elif self.model == "ARIMA": 72 self.obj = ARIMA 73 elif self.model == "ETS": 74 self.obj = ExponentialSmoothing 75 elif self.model == "Theta": 76 self.obj = ThetaModel 77 else: 78 raise ValueError("model not recognized") 79 self.n_series = None 80 self.replications = None 81 self.mean_ = None 82 self.upper_ = None 83 self.lower_ = None 84 self.output_dates_ = None 85 self.alpha_ = None 86 self.df_ = None 87 self.residuals_ = [] 88 self.sims_ = None 89 self.level_ = None 90 91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self 153 154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_) 260 261 def _compute_confidence_intervals(self, forecast_result, alpha): 262 """ 263 Compute confidence intervals for VECM forecasts. 264 Uses the covariance of residuals to approximate the confidence intervals. 265 """ 266 residuals = self.obj.resid 267 cov_matrix = np.cov(residuals.T) # Covariance matrix of residuals 268 std_errors = np.sqrt(np.diag(cov_matrix)) # Standard errors 269 270 z_value = norm.ppf(1 - alpha / 2) # Z-score for the given alpha level 271 lower_bound = forecast_result - z_value * std_errors 272 upper_bound = forecast_result + z_value * std_errors 273 274 return lower_bound, upper_bound 275 276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds) 347 348 def plot(self, series=None, type_axis="dates", type_plot="pi"): 349 """Plot time series forecast 350 351 Parameters: 352 353 series: {integer} or {string} 354 series index or name 355 356 """ 357 358 assert all( 359 [ 360 self.mean_ is not None, 361 self.lower_ is not None, 362 self.upper_ is not None, 363 self.output_dates_ is not None, 364 ] 365 ), "model forecasting must be obtained first (with predict)" 366 367 if series is None: 368 assert ( 369 self.n_series == 1 370 ), "please specify series index or name (n_series > 1)" 371 series = 0 372 373 if isinstance(series, str): 374 assert ( 375 series in self.series_names 376 ), f"series {series} doesn't exist in the input dataset" 377 series_idx = self.df_.columns.get_loc(series) 378 else: 379 assert isinstance(series, int) and ( 380 0 <= series < self.n_series 381 ), f"check series index (< {self.n_series})" 382 series_idx = series 383 384 if isinstance(self.df_, pd.DataFrame): 385 y_all = list(self.df_.iloc[:, series_idx]) + list( 386 self.mean_.iloc[:, series_idx] 387 ) 388 y_test = list(self.mean_.iloc[:, series_idx]) 389 else: 390 y_all = list(self.df_.values) + list(self.mean_.values) 391 y_test = list(self.mean_.values) 392 n_points_all = len(y_all) 393 n_points_train = self.df_.shape[0] 394 395 if type_axis == "numeric": 396 x_all = [i for i in range(n_points_all)] 397 x_test = [i for i in range(n_points_train, n_points_all)] 398 399 if type_axis == "dates": # use dates 400 x_all = np.concatenate( 401 (self.input_dates.values, self.output_dates_.values), axis=None 402 ) 403 x_test = self.output_dates_.values 404 405 if type_plot == "pi": 406 fig, ax = plt.subplots() 407 ax.plot(x_all, y_all, "-") 408 ax.plot(x_test, y_test, "-", color="orange") 409 try: 410 ax.fill_between( 411 x_test, 412 self.lower_.iloc[:, series_idx], 413 self.upper_.iloc[:, series_idx], 414 alpha=0.2, 415 color="orange", 416 ) 417 except Exception: 418 ax.fill_between( 419 x_test, 420 self.lower_.values, 421 self.upper_.values, 422 alpha=0.2, 423 color="orange", 424 ) 425 if self.replications is None: 426 if self.n_series > 1: 427 plt.title( 428 f"prediction intervals for {series}", 429 loc="left", 430 fontsize=12, 431 fontweight=0, 432 color="black", 433 ) 434 else: 435 plt.title( 436 f"prediction intervals for input time series", 437 loc="left", 438 fontsize=12, 439 fontweight=0, 440 color="black", 441 ) 442 plt.show() 443 else: # self.replications is not None 444 if self.n_series > 1: 445 plt.title( 446 f"prediction intervals for {self.replications} simulations of {series}", 447 loc="left", 448 fontsize=12, 449 fontweight=0, 450 color="black", 451 ) 452 else: 453 plt.title( 454 f"prediction intervals for {self.replications} simulations of input time series", 455 loc="left", 456 fontsize=12, 457 fontweight=0, 458 color="black", 459 ) 460 plt.show() 461 462 if type_plot == "spaghetti": 463 palette = plt.get_cmap("Set1") 464 sims_ix = getsims(self.sims_, series_idx) 465 plt.plot(x_all, y_all, "-") 466 for col_ix in range( 467 sims_ix.shape[1] 468 ): # avoid this when there are thousands of simulations 469 plt.plot( 470 x_test, 471 sims_ix[:, col_ix], 472 "-", 473 color=palette(col_ix), 474 linewidth=1, 475 alpha=0.9, 476 ) 477 plt.plot(x_all, y_all, "-", color="black") 478 plt.plot(x_test, y_test, "-", color="blue") 479 # Add titles 480 if self.n_series > 1: 481 plt.title( 482 f"{self.replications} simulations of {series}", 483 loc="left", 484 fontsize=12, 485 fontweight=0, 486 color="black", 487 ) 488 else: 489 plt.title( 490 f"{self.replications} simulations of input time series", 491 loc="left", 492 fontsize=12, 493 fontweight=0, 494 color="black", 495 ) 496 plt.xlabel("Time") 497 plt.ylabel("Values") 498 # Show the graph 499 plt.show() 500 501 def cross_val_score( 502 self, 503 X, 504 scoring="root_mean_squared_error", 505 n_jobs=None, 506 verbose=0, 507 xreg=None, 508 initial_window=5, 509 horizon=3, 510 fixed_window=False, 511 show_progress=True, 512 level=95, 513 **kwargs, 514 ): 515 """Evaluate a score by time series cross-validation. 516 517 Parameters: 518 519 X: {array-like, sparse matrix} of shape (n_samples, n_features) 520 The data to fit. 521 522 scoring: str or a function 523 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 524 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 525 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 526 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 527 528 n_jobs: int, default=None 529 Number of jobs to run in parallel. 530 531 verbose: int, default=0 532 The verbosity level. 533 534 xreg: array-like, optional (default=None) 535 Additional (external) regressors to be passed to `fit` 536 xreg must be in 'increasing' order (most recent observations last) 537 538 initial_window: int 539 initial number of consecutive values in each training set sample 540 541 horizon: int 542 number of consecutive values in test set sample 543 544 fixed_window: boolean 545 if False, all training samples start at index 0, and the training 546 window's size is increasing. 547 if True, the training window's size is fixed, and the window is 548 rolling forward 549 550 show_progress: boolean 551 if True, a progress bar is printed 552 553 **kwargs: dict 554 additional parameters to be passed to `fit` and `predict` 555 556 Returns: 557 558 A tuple: descriptive statistics or errors and raw errors 559 560 """ 561 tscv = TimeSeriesSplit() 562 563 tscv_obj = tscv.split( 564 X, 565 initial_window=initial_window, 566 horizon=horizon, 567 fixed_window=fixed_window, 568 ) 569 570 if isinstance(scoring, str): 571 572 assert scoring in ( 573 "root_mean_squared_error", 574 "mean_squared_error", 575 "mean_error", 576 "mean_absolute_error", 577 "mean_percentage_error", 578 "mean_absolute_percentage_error", 579 "winkler_score", 580 "coverage", 581 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 582 583 def err_func(X_test, X_pred, scoring): 584 if (self.replications is not None) or ( 585 self.type_pi == "gaussian" 586 ): # probabilistic 587 if scoring == "winkler_score": 588 return winkler_score(X_pred, X_test, level=level) 589 elif scoring == "coverage": 590 return coverage(X_pred, X_test, level=level) 591 else: 592 return mean_errors( 593 pred=X_pred.mean, actual=X_test, scoring=scoring 594 ) 595 else: # not probabilistic 596 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 597 598 else: # isinstance(scoring, str) = False 599 600 err_func = scoring 601 602 errors = [] 603 604 train_indices = [] 605 606 test_indices = [] 607 608 for train_index, test_index in tscv_obj: 609 train_indices.append(train_index) 610 test_indices.append(test_index) 611 612 if show_progress is True: 613 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 614 else: 615 iterator = zip(train_indices, test_indices) 616 617 for train_index, test_index in iterator: 618 619 if verbose == 1: 620 print(f"TRAIN: {train_index}") 621 print(f"TEST: {test_index}") 622 623 if isinstance(X, pd.DataFrame): 624 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 625 X_test = X.iloc[test_index, :] 626 else: 627 self.fit(X[train_index, :], xreg=xreg, **kwargs) 628 X_test = X[test_index, :] 629 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 630 631 errors.append(err_func(X_test, X_pred, scoring)) 632 633 res = np.asarray(errors) 634 635 return res, describe(res)
Multivariate time series (FactorMTS) forecasting with Factor models
Parameters:
model: type of model: str.
currently, 'VAR', 'VECM', 'ARIMA', 'ETS', 'Theta'
Attributes:
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
level_: int
level of confidence for prediction intervals (default is 95)
Examples: See examples/classical_mts_timeseries.py
91 def fit(self, X, **kwargs): 92 """Fit FactorMTS model to training data X, with optional regressors xreg 93 94 Parameters: 95 96 X: {array-like}, shape = [n_samples, n_features] 97 Training time series, where n_samples is the number 98 of samples and n_features is the number of features; 99 X must be in increasing order (most recent observations last) 100 101 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 102 103 Returns: 104 105 self: object 106 """ 107 108 try: 109 self.n_series = X.shape[1] 110 except Exception: 111 self.n_series = 1 112 113 if (isinstance(X, pd.DataFrame) is False) and isinstance( 114 X, pd.Series 115 ) is False: # input data set is a numpy array 116 117 X = pd.DataFrame(X) 118 if self.n_series > 1: 119 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 120 else: 121 self.series_names = "series0" 122 123 else: # input data set is a DataFrame or Series with column names 124 125 X_index = None 126 if X.index is not None and len(X.shape) > 1: 127 X_index = X.index 128 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 129 if X_index is not None: 130 try: 131 X.index = X_index 132 except Exception: 133 pass 134 if isinstance(X, pd.DataFrame): 135 self.series_names = X.columns.tolist() 136 else: 137 self.series_names = X.name 138 139 if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series): 140 self.df_ = X 141 X = X.values 142 self.df_.columns = self.series_names 143 self.input_dates = ts.compute_input_dates(self.df_) 144 else: 145 self.df_ = pd.DataFrame(X, columns=self.series_names) 146 147 if self.model == "Theta": 148 self.obj = self.obj(self.df_, **kwargs).fit() 149 else: 150 self.obj = self.obj(X, **kwargs).fit(**kwargs) 151 152 return self
Fit FactorMTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
154 def predict(self, h=5, level=95, **kwargs): 155 """Forecast all the time series, h steps ahead 156 157 Parameters: 158 159 h: {integer} 160 Forecasting horizon 161 162 **kwargs: additional parameters to be passed to 163 self.cook_test_set 164 165 Returns: 166 167 model predictions for horizon = h: {array-like} 168 169 """ 170 171 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 172 173 self.level_ = level 174 175 self.lower_ = None # do not remove (/!\) 176 177 self.upper_ = None # do not remove (/!\) 178 179 self.sims_ = None # do not remove (/!\) 180 181 self.level_ = level 182 183 self.alpha_ = 100 - level 184 185 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 186 187 # Named tuple for forecast results 188 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 189 190 if self.model == "VAR": 191 mean_forecast, lower_bound, upper_bound = self.obj.forecast_interval( 192 self.obj.endog, steps=h, alpha=self.alpha_ / 100, **kwargs 193 ) 194 195 elif self.model == "VECM": 196 forecast_result = self.obj.predict(steps=h) 197 mean_forecast = forecast_result 198 lower_bound, upper_bound = self._compute_confidence_intervals( 199 forecast_result, alpha=self.alpha_ / 100, **kwargs 200 ) 201 202 elif self.model == "ARIMA": 203 forecast_result = self.obj.get_forecast(steps=h) 204 mean_forecast = forecast_result.predicted_mean 205 lower_bound = forecast_result.conf_int()[:, 0] 206 upper_bound = forecast_result.conf_int()[:, 1] 207 208 elif self.model == "ETS": 209 forecast_result = self.obj.forecast(steps=h) 210 residuals = self.obj.resid 211 std_errors = np.std(residuals) 212 mean_forecast = forecast_result 213 lower_bound = forecast_result - pi_multiplier * std_errors 214 upper_bound = forecast_result + pi_multiplier * std_errors 215 216 elif self.model == "Theta": 217 try: 218 mean_forecast = self.obj.forecast(steps=h).values 219 forecast_result = self.obj.prediction_intervals( 220 steps=h, alpha=self.alpha_ / 100, **kwargs 221 ) 222 lower_bound = forecast_result["lower"].values 223 upper_bound = forecast_result["upper"].values 224 except Exception: 225 mean_forecast = self.obj.forecast(steps=h) 226 forecast_result = self.obj.prediction_intervals( 227 steps=h, alpha=self.alpha_ / 100, **kwargs 228 ) 229 lower_bound = forecast_result["lower"] 230 upper_bound = forecast_result["upper"] 231 232 else: 233 234 raise ValueError("model not recognized") 235 236 try: 237 self.mean_ = pd.DataFrame( 238 mean_forecast, 239 columns=self.series_names, 240 index=self.output_dates_, 241 ) 242 self.lower_ = pd.DataFrame( 243 lower_bound, columns=self.series_names, index=self.output_dates_ 244 ) 245 self.upper_ = pd.DataFrame( 246 upper_bound, columns=self.series_names, index=self.output_dates_ 247 ) 248 except Exception: 249 self.mean_ = pd.Series( 250 mean_forecast, name=self.series_names, index=self.output_dates_ 251 ) 252 self.lower_ = pd.Series( 253 lower_bound, name=self.series_names, index=self.output_dates_ 254 ) 255 self.upper_ = pd.Series( 256 upper_bound, name=self.series_names, index=self.output_dates_ 257 ) 258 259 return DescribeResult(mean=self.mean_, lower=self.lower_, upper=self.upper_)
Forecast all the time series, h steps ahead
Parameters:
h: {integer} Forecasting horizon
**kwargs: additional parameters to be passed to self.cook_test_set
Returns:
model predictions for horizon = h: {array-like}
276 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 277 """Train on training_index, score on testing_index.""" 278 279 assert ( 280 bool(set(training_index).intersection(set(testing_index))) == False 281 ), "Non-overlapping 'training_index' and 'testing_index' required" 282 283 # Dimensions 284 try: 285 # multivariate time series 286 n, p = X.shape 287 except: 288 # univariate time series 289 n = X.shape[0] 290 p = 1 291 292 # Training and testing sets 293 if p > 1: 294 X_train = X[training_index, :] 295 X_test = X[testing_index, :] 296 else: 297 X_train = X[training_index] 298 X_test = X[testing_index] 299 300 # Horizon 301 h = len(testing_index) 302 assert ( 303 len(training_index) + h 304 ) <= n, "Please check lengths of training and testing windows" 305 306 # Fit and predict 307 self.fit(X_train, **kwargs) 308 preds = self.predict(h=h, **kwargs) 309 310 if scoring is None: 311 scoring = "neg_root_mean_squared_error" 312 313 # check inputs 314 assert scoring in ( 315 "explained_variance", 316 "neg_mean_absolute_error", 317 "neg_mean_squared_error", 318 "neg_root_mean_squared_error", 319 "neg_mean_squared_log_error", 320 "neg_median_absolute_error", 321 "r2", 322 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 323 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 324 'neg_median_absolute_error', 'r2')" 325 326 scoring_options = { 327 "explained_variance": skm2.explained_variance_score, 328 "neg_mean_absolute_error": skm2.mean_absolute_error, 329 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 330 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 331 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 332 "neg_median_absolute_error": skm2.median_absolute_error, 333 "r2": skm2.r2_score, 334 } 335 336 # if p > 1: 337 # return tuple( 338 # [ 339 # scoring_options[scoring]( 340 # X_test[:, i], preds[:, i]#, **kwargs 341 # ) 342 # for i in range(p) 343 # ] 344 # ) 345 # else: 346 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class CustomClassifier(Custom, ClassifierMixin): 17 """Custom Classification model 18 19 Attributes: 20 21 obj: object 22 any object containing a method fit (obj.fit()) and a method predict 23 (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model''s 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 cv_calibration: int, cross-validation generator, or iterable, default=2 74 Determines the cross-validation splitting strategy. Same as 75 `sklearn.calibration.CalibratedClassifierCV` 76 77 calibration_method: str 78 {‘sigmoid’, ‘isotonic’}, default=’sigmoid’ 79 The method to use for calibration. Same as 80 `sklearn.calibration.CalibratedClassifierCV` 81 82 seed: int 83 reproducibility seed for nodes_sim=='uniform' 84 85 backend: str 86 "cpu" or "gpu" or "tpu" 87 88 Examples: 89 90 Note: it's better to use the `DeepClassifier` or `LazyDeepClassifier` classes directly 91 92 ```python 93 import nnetsauce as ns 94 from sklearn.ensemble import RandomForestClassifier 95 from sklearn.model_selection import train_test_split 96 from sklearn.datasets import load_digits 97 from time import time 98 99 digits = load_digits() 100 X = digits.data 101 y = digits.target 102 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 103 random_state=123) 104 105 # layer 1 (base layer) ---- 106 layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123) 107 108 start = time() 109 110 layer1_regr.fit(X_train, y_train) 111 112 # Accuracy in layer 1 113 print(layer1_regr.score(X_test, y_test)) 114 115 # layer 2 using layer 1 ---- 116 layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5, 117 direct_link=True, bias=True, 118 nodes_sim='uniform', activation_name='relu', 119 n_clusters=2, seed=123) 120 layer2_regr.fit(X_train, y_train) 121 122 # Accuracy in layer 2 123 print(layer2_regr.score(X_test, y_test)) 124 125 # layer 3 using layer 2 ---- 126 layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10, 127 direct_link=True, bias=True, dropout=0.7, 128 nodes_sim='uniform', activation_name='relu', 129 n_clusters=2, seed=123) 130 layer3_regr.fit(X_train, y_train) 131 132 # Accuracy in layer 3 133 print(layer3_regr.score(X_test, y_test)) 134 135 print(f"Elapsed {time() - start}") 136 ``` 137 138 """ 139 140 # construct the object ----- 141 142 def __init__( 143 self, 144 obj, 145 n_hidden_features=5, 146 activation_name="relu", 147 a=0.01, 148 nodes_sim="sobol", 149 bias=True, 150 dropout=0, 151 direct_link=True, 152 n_clusters=2, 153 cluster_encode=True, 154 type_clust="kmeans", 155 type_scaling=("std", "std", "std"), 156 col_sample=1, 157 row_sample=1, 158 cv_calibration=2, 159 calibration_method="sigmoid", 160 seed=123, 161 backend="cpu", 162 ): 163 super().__init__( 164 obj=obj, 165 n_hidden_features=n_hidden_features, 166 activation_name=activation_name, 167 a=a, 168 nodes_sim=nodes_sim, 169 bias=bias, 170 dropout=dropout, 171 direct_link=direct_link, 172 n_clusters=n_clusters, 173 cluster_encode=cluster_encode, 174 type_clust=type_clust, 175 type_scaling=type_scaling, 176 col_sample=col_sample, 177 row_sample=row_sample, 178 seed=seed, 179 backend=backend, 180 ) 181 self.coef_ = None 182 self.intercept_ = None 183 self.type_fit = "classification" 184 self.cv_calibration = cv_calibration 185 self.calibration_method = calibration_method 186 if self.cv_calibration is not None: 187 self.obj = CalibratedClassifierCV(self.obj, cv=self.cv_calibration, 188 method=self.calibration_method) 189 self._estimator_type = "classifier" 190 191 def __sklearn_clone__(self): 192 """Create a clone of the estimator. 193 194 This is required for scikit-learn's calibration system to work properly. 195 """ 196 # Create a new instance with the same parameters 197 clone = CustomClassifier( 198 obj=self.obj, 199 n_hidden_features=self.n_hidden_features, 200 activation_name=self.activation_name, 201 a=self.a, 202 nodes_sim=self.nodes_sim, 203 bias=self.bias, 204 dropout=self.dropout, 205 direct_link=self.direct_link, 206 n_clusters=self.n_clusters, 207 cluster_encode=self.cluster_encode, 208 type_clust=self.type_clust, 209 type_scaling=self.type_scaling, 210 col_sample=self.col_sample, 211 row_sample=self.row_sample, 212 cv_calibration=self.cv_calibration, 213 calibration_method=self.calibration_method, 214 seed=self.seed, 215 backend=self.backend 216 ) 217 return clone 218 219 def fit(self, X, y, sample_weight=None, **kwargs): 220 """Fit custom model to training data (X, y). 221 222 Parameters: 223 224 X: {array-like}, shape = [n_samples, n_features] 225 Training vectors, where n_samples is the number 226 of samples and n_features is the number of features. 227 228 y: array-like, shape = [n_samples] 229 Target values. 230 231 sample_weight: array-like, shape = [n_samples] 232 Sample weights. 233 234 **kwargs: additional parameters to be passed to 235 self.cook_training_set or self.obj.fit 236 237 Returns: 238 239 self: object 240 """ 241 242 if len(X.shape) == 1: 243 if isinstance(X, pd.DataFrame): 244 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 245 else: 246 X = X.reshape(1, -1) 247 248 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 249 self.classes_ = np.unique(y) 250 self.n_classes_ = len(self.classes_) # for compatibility with 251 252 # if sample_weights, else: (must use self.row_index) 253 if sample_weight is not None: 254 self.obj.fit( 255 scaled_Z, 256 output_y, 257 sample_weight=sample_weight[self.index_row_].ravel(), 258 # **kwargs 259 ) 260 261 return self 262 263 # if sample_weight is None: 264 self.obj.fit(scaled_Z, output_y) 265 self.classes_ = np.unique(y) # for compatibility with sklearn 266 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 267 268 if hasattr(self.obj, "coef_"): 269 self.coef_ = self.obj.coef_ 270 271 if hasattr(self.obj, "intercept_"): 272 self.intercept_ = self.obj.intercept_ 273 274 return self 275 276 def partial_fit(self, X, y, sample_weight=None, **kwargs): 277 """Partial fit custom model to training data (X, y). 278 279 Parameters: 280 281 X: {array-like}, shape = [n_samples, n_features] 282 Subset of training vectors, where n_samples is the number 283 of samples and n_features is the number of features. 284 285 y: array-like, shape = [n_samples] 286 Subset of target values. 287 288 sample_weight: array-like, shape = [n_samples] 289 Sample weights. 290 291 **kwargs: additional parameters to be passed to 292 self.cook_training_set or self.obj.fit 293 294 Returns: 295 296 self: object 297 """ 298 299 if len(X.shape) == 1: 300 if isinstance(X, pd.DataFrame): 301 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 302 else: 303 X = X.reshape(1, -1) 304 y = np.array([y], dtype=np.integer) 305 306 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 307 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 308 309 # if sample_weights, else: (must use self.row_index) 310 if sample_weight is not None: 311 try: 312 self.obj.partial_fit( 313 scaled_Z, 314 output_y, 315 sample_weight=sample_weight[self.index_row_].ravel(), 316 # **kwargs 317 ) 318 except: 319 NotImplementedError 320 321 return self 322 323 # if sample_weight is None: 324 try: 325 self.obj.partial_fit(scaled_Z, output_y) 326 except: 327 raise NotImplementedError 328 329 self.classes_ = np.unique(y) # for compatibility with sklearn 330 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 331 332 return self 333 334 def predict(self, X, **kwargs): 335 """Predict test data X. 336 337 Parameters: 338 339 X: {array-like}, shape = [n_samples, n_features] 340 Training vectors, where n_samples is the number 341 of samples and n_features is the number of features. 342 343 **kwargs: additional parameters to be passed to 344 self.cook_test_set 345 346 Returns: 347 348 model predictions: {array-like} 349 """ 350 351 if len(X.shape) == 1: 352 n_features = X.shape[0] 353 new_X = mo.rbind( 354 X.reshape(1, n_features), 355 np.ones(n_features).reshape(1, n_features), 356 ) 357 358 return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0] 359 360 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs) 361 362 def predict_proba(self, X, **kwargs): 363 """Predict probabilities for test data X. 364 365 Args: 366 367 X: {array-like}, shape = [n_samples, n_features] 368 Training vectors, where n_samples is the number 369 of samples and n_features is the number of features. 370 371 **kwargs: additional parameters to be passed to 372 self.cook_test_set 373 374 Returns: 375 376 probability estimates for test data: {array-like} 377 """ 378 379 if len(X.shape) == 1: 380 n_features = X.shape[0] 381 new_X = mo.rbind( 382 X.reshape(1, n_features), 383 np.ones(n_features).reshape(1, n_features), 384 ) 385 return ( 386 self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs) 387 )[0] 388 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs) 389 390 def decision_function(self, X, **kwargs): 391 """Compute the decision function of X. 392 393 Parameters: 394 X: {array-like}, shape = [n_samples, n_features] 395 Samples to compute decision function for. 396 397 **kwargs: additional parameters to be passed to 398 self.cook_test_set 399 400 Returns: 401 array-like of shape (n_samples,) or (n_samples, n_classes) 402 Decision function of the input samples. The order of outputs is the same 403 as that of the classes passed to fit. 404 """ 405 if not hasattr(self.obj, "decision_function"): 406 # If base classifier doesn't have decision_function, use predict_proba 407 proba = self.predict_proba(X, **kwargs) 408 if proba.shape[1] == 2: 409 return proba[:, 1] # For binary classification 410 return proba # For multiclass 411 412 if len(X.shape) == 1: 413 n_features = X.shape[0] 414 new_X = mo.rbind( 415 X.reshape(1, n_features), 416 np.ones(n_features).reshape(1, n_features), 417 ) 418 419 return ( 420 self.obj.decision_function( 421 self.cook_test_set(new_X, **kwargs), **kwargs 422 ) 423 )[0] 424 425 return self.obj.decision_function(self.cook_test_set(X, **kwargs), **kwargs) 426 427 def score(self, X, y, scoring=None): 428 """Scoring function for classification. 429 430 Args: 431 432 X: {array-like}, shape = [n_samples, n_features] 433 Training vectors, where n_samples is the number 434 of samples and n_features is the number of features. 435 436 y: array-like, shape = [n_samples] 437 Target values. 438 439 scoring: str 440 scoring method (default is accuracy) 441 442 Returns: 443 444 score: float 445 """ 446 447 if scoring is None: 448 scoring = "accuracy" 449 450 if scoring == "accuracy": 451 return skm2.accuracy_score(y, self.predict(X)) 452 453 if scoring == "f1": 454 return skm2.f1_score(y, self.predict(X)) 455 456 if scoring == "precision": 457 return skm2.precision_score(y, self.predict(X)) 458 459 if scoring == "recall": 460 return skm2.recall_score(y, self.predict(X)) 461 462 if scoring == "roc_auc": 463 return skm2.roc_auc_score(y, self.predict(X)) 464 465 if scoring == "log_loss": 466 return skm2.log_loss(y, self.predict_proba(X)) 467 468 if scoring == "balanced_accuracy": 469 return skm2.balanced_accuracy_score(y, self.predict(X)) 470 471 if scoring == "average_precision": 472 return skm2.average_precision_score(y, self.predict(X)) 473 474 if scoring == "neg_brier_score": 475 return -skm2.brier_score_loss(y, self.predict_proba(X)) 476 477 if scoring == "neg_log_loss": 478 return -skm2.log_loss(y, self.predict_proba(X))
Custom Classification model
Attributes:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
cv_calibration: int, cross-validation generator, or iterable, default=2
Determines the cross-validation splitting strategy. Same as
`sklearn.calibration.CalibratedClassifierCV`
calibration_method: str
{‘sigmoid’, ‘isotonic’}, default=’sigmoid’
The method to use for calibration. Same as
`sklearn.calibration.CalibratedClassifierCV`
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
Note: it's better to use the DeepClassifier or LazyDeepClassifier classes directly
import nnetsauce as ns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from time import time
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=123)
# layer 1 (base layer) ----
layer1_regr = RandomForestClassifier(n_estimators=10, random_state=123)
start = time()
layer1_regr.fit(X_train, y_train)
# Accuracy in layer 1
print(layer1_regr.score(X_test, y_test))
# layer 2 using layer 1 ----
layer2_regr = ns.CustomClassifier(obj = layer1_regr, n_hidden_features=5,
direct_link=True, bias=True,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer2_regr.fit(X_train, y_train)
# Accuracy in layer 2
print(layer2_regr.score(X_test, y_test))
# layer 3 using layer 2 ----
layer3_regr = ns.CustomClassifier(obj = layer2_regr, n_hidden_features=10,
direct_link=True, bias=True, dropout=0.7,
nodes_sim='uniform', activation_name='relu',
n_clusters=2, seed=123)
layer3_regr.fit(X_train, y_train)
# Accuracy in layer 3
print(layer3_regr.score(X_test, y_test))
print(f"Elapsed {time() - start}")
219 def fit(self, X, y, sample_weight=None, **kwargs): 220 """Fit custom model to training data (X, y). 221 222 Parameters: 223 224 X: {array-like}, shape = [n_samples, n_features] 225 Training vectors, where n_samples is the number 226 of samples and n_features is the number of features. 227 228 y: array-like, shape = [n_samples] 229 Target values. 230 231 sample_weight: array-like, shape = [n_samples] 232 Sample weights. 233 234 **kwargs: additional parameters to be passed to 235 self.cook_training_set or self.obj.fit 236 237 Returns: 238 239 self: object 240 """ 241 242 if len(X.shape) == 1: 243 if isinstance(X, pd.DataFrame): 244 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 245 else: 246 X = X.reshape(1, -1) 247 248 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 249 self.classes_ = np.unique(y) 250 self.n_classes_ = len(self.classes_) # for compatibility with 251 252 # if sample_weights, else: (must use self.row_index) 253 if sample_weight is not None: 254 self.obj.fit( 255 scaled_Z, 256 output_y, 257 sample_weight=sample_weight[self.index_row_].ravel(), 258 # **kwargs 259 ) 260 261 return self 262 263 # if sample_weight is None: 264 self.obj.fit(scaled_Z, output_y) 265 self.classes_ = np.unique(y) # for compatibility with sklearn 266 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 267 268 if hasattr(self.obj, "coef_"): 269 self.coef_ = self.obj.coef_ 270 271 if hasattr(self.obj, "intercept_"): 272 self.intercept_ = self.obj.intercept_ 273 274 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
334 def predict(self, X, **kwargs): 335 """Predict test data X. 336 337 Parameters: 338 339 X: {array-like}, shape = [n_samples, n_features] 340 Training vectors, where n_samples is the number 341 of samples and n_features is the number of features. 342 343 **kwargs: additional parameters to be passed to 344 self.cook_test_set 345 346 Returns: 347 348 model predictions: {array-like} 349 """ 350 351 if len(X.shape) == 1: 352 n_features = X.shape[0] 353 new_X = mo.rbind( 354 X.reshape(1, n_features), 355 np.ones(n_features).reshape(1, n_features), 356 ) 357 358 return (self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs))[0] 359 360 return self.obj.predict(self.cook_test_set(X, **kwargs), **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
362 def predict_proba(self, X, **kwargs): 363 """Predict probabilities for test data X. 364 365 Args: 366 367 X: {array-like}, shape = [n_samples, n_features] 368 Training vectors, where n_samples is the number 369 of samples and n_features is the number of features. 370 371 **kwargs: additional parameters to be passed to 372 self.cook_test_set 373 374 Returns: 375 376 probability estimates for test data: {array-like} 377 """ 378 379 if len(X.shape) == 1: 380 n_features = X.shape[0] 381 new_X = mo.rbind( 382 X.reshape(1, n_features), 383 np.ones(n_features).reshape(1, n_features), 384 ) 385 return ( 386 self.obj.predict_proba(self.cook_test_set(new_X, **kwargs), **kwargs) 387 )[0] 388 return self.obj.predict_proba(self.cook_test_set(X, **kwargs), **kwargs)
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
427 def score(self, X, y, scoring=None): 428 """Scoring function for classification. 429 430 Args: 431 432 X: {array-like}, shape = [n_samples, n_features] 433 Training vectors, where n_samples is the number 434 of samples and n_features is the number of features. 435 436 y: array-like, shape = [n_samples] 437 Target values. 438 439 scoring: str 440 scoring method (default is accuracy) 441 442 Returns: 443 444 score: float 445 """ 446 447 if scoring is None: 448 scoring = "accuracy" 449 450 if scoring == "accuracy": 451 return skm2.accuracy_score(y, self.predict(X)) 452 453 if scoring == "f1": 454 return skm2.f1_score(y, self.predict(X)) 455 456 if scoring == "precision": 457 return skm2.precision_score(y, self.predict(X)) 458 459 if scoring == "recall": 460 return skm2.recall_score(y, self.predict(X)) 461 462 if scoring == "roc_auc": 463 return skm2.roc_auc_score(y, self.predict(X)) 464 465 if scoring == "log_loss": 466 return skm2.log_loss(y, self.predict_proba(X)) 467 468 if scoring == "balanced_accuracy": 469 return skm2.balanced_accuracy_score(y, self.predict(X)) 470 471 if scoring == "average_precision": 472 return skm2.average_precision_score(y, self.predict(X)) 473 474 if scoring == "neg_brier_score": 475 return -skm2.brier_score_loss(y, self.predict_proba(X)) 476 477 if scoring == "neg_log_loss": 478 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
18class CustomRegressor(Custom, RegressorMixin): 19 """Custom Regression model 20 21 This class is used to 'augment' any regression model with transformed features. 22 23 Parameters: 24 25 obj: object 26 any object containing a method fit (obj.fit()) and a method predict 27 (obj.predict()) 28 29 n_hidden_features: int 30 number of nodes in the hidden layer 31 32 activation_name: str 33 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 34 35 a: float 36 hyperparameter for 'prelu' or 'elu' activation function 37 38 nodes_sim: str 39 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 40 'uniform' 41 42 bias: boolean 43 indicates if the hidden layer contains a bias term (True) or not 44 (False) 45 46 dropout: float 47 regularization parameter; (random) percentage of nodes dropped out 48 of the training 49 50 direct_link: boolean 51 indicates if the original predictors are included (True) in model's 52 fitting or not (False) 53 54 n_clusters: int 55 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 56 no clustering) 57 58 cluster_encode: bool 59 defines how the variable containing clusters is treated (default is one-hot) 60 if `False`, then labels are used, without one-hot encoding 61 62 type_clust: str 63 type of clustering method: currently k-means ('kmeans') or Gaussian 64 Mixture Model ('gmm') 65 66 type_scaling: a tuple of 3 strings 67 scaling methods for inputs, hidden layer, and clustering respectively 68 (and when relevant). 69 Currently available: standardization ('std') or MinMax scaling ('minmax') 70 71 type_pi: str. 72 type of prediction interval; currently `None` (split or local 73 conformal without simulation), "kde" or "bootstrap" (simulated split 74 conformal). 75 76 replications: int. 77 number of replications (if needed) for predictive simulation. 78 Used only in `self.predict`, for `self.kernel` in ('gaussian', 79 'tophat') and `self.type_pi = 'kde'`. Default is `None`. 80 81 kernel: str. 82 the kernel to use for kernel density estimation (used for predictive 83 simulation in `self.predict`, with `method='splitconformal'` and 84 `type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'. 85 86 type_split: str. 87 Type of splitting for conformal prediction. None (default), or 88 "random" (random split of data) or "sequential" (sequential split of data) 89 90 col_sample: float 91 percentage of covariates randomly chosen for training 92 93 row_sample: float 94 percentage of rows chosen for training, by stratified bootstrapping 95 96 level: float 97 confidence level for prediction intervals 98 99 pi_method: str 100 method for prediction intervals: 'splitconformal' or 'localconformal' 101 102 seed: int 103 reproducibility seed for nodes_sim=='uniform' 104 105 type_fit: str 106 'regression' 107 108 backend: str 109 "cpu" or "gpu" or "tpu" 110 111 Examples: 112 113 See [https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression](https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression) 114 115 """ 116 117 # construct the object ----- 118 119 def __init__( 120 self, 121 obj, 122 n_hidden_features=5, 123 activation_name="relu", 124 a=0.01, 125 nodes_sim="sobol", 126 bias=True, 127 dropout=0, 128 direct_link=True, 129 n_clusters=2, 130 cluster_encode=True, 131 type_clust="kmeans", 132 type_scaling=("std", "std", "std"), 133 type_pi=None, 134 replications=None, 135 kernel=None, 136 type_split=None, 137 col_sample=1, 138 row_sample=1, 139 level=None, 140 pi_method=None, 141 seed=123, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_hidden_features=n_hidden_features, 147 activation_name=activation_name, 148 a=a, 149 nodes_sim=nodes_sim, 150 bias=bias, 151 dropout=dropout, 152 direct_link=direct_link, 153 n_clusters=n_clusters, 154 cluster_encode=cluster_encode, 155 type_clust=type_clust, 156 type_scaling=type_scaling, 157 col_sample=col_sample, 158 row_sample=row_sample, 159 seed=seed, 160 backend=backend, 161 ) 162 163 self.type_fit = "regression" 164 self.type_pi = type_pi 165 self.replications = replications 166 self.kernel = kernel 167 self.type_split = type_split 168 self.level = level 169 self.pi_method = pi_method 170 self.coef_ = None 171 self.intercept_ = None 172 self.X_ = None 173 self.y_ = None 174 175 def fit(self, X, y, sample_weight=None, **kwargs): 176 """Fit custom model to training data (X, y). 177 178 Parameters: 179 180 X: {array-like}, shape = [n_samples, n_features] 181 Training vectors, where n_samples is the number 182 of samples and n_features is the number of features. 183 184 y: array-like, shape = [n_samples] 185 Target values. 186 187 sample_weight: array-like, shape = [n_samples] 188 Sample weights. 189 190 **kwargs: additional parameters to be passed to 191 self.cook_training_set or self.obj.fit 192 193 Returns: 194 195 self: object 196 197 """ 198 199 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 200 201 if self.level is not None: 202 self.obj = PredictionInterval( 203 obj=self.obj, method=self.pi_method, level=self.level 204 ) 205 206 # if sample_weights, else: (must use self.row_index) 207 if sample_weight is not None: 208 self.obj.fit( 209 scaled_Z, 210 centered_y, 211 sample_weight=sample_weight[self.index_row_].ravel(), 212 **kwargs 213 ) 214 215 return self 216 217 self.obj.fit(scaled_Z, centered_y, **kwargs) 218 219 self.X_ = X 220 221 self.y_ = y 222 223 if hasattr(self.obj, "coef_"): 224 self.coef_ = self.obj.coef_ 225 226 if hasattr(self.obj, "intercept_"): 227 self.intercept_ = self.obj.intercept_ 228 229 return self 230 231 def partial_fit(self, X, y, **kwargs): 232 """Partial fit custom model to training data (X, y). 233 234 Parameters: 235 236 X: {array-like}, shape = [n_samples, n_features] 237 Subset of training vectors, where n_samples is the number 238 of samples and n_features is the number of features. 239 240 y: array-like, shape = [n_samples] 241 Subset of target values. 242 243 **kwargs: additional parameters to be passed to 244 self.cook_training_set or self.obj.fit 245 246 Returns: 247 248 self: object 249 250 """ 251 252 if len(X.shape) == 1: 253 if isinstance(X, pd.DataFrame): 254 X = pd.DataFrame(X.values.reshape(1, -1), columns=X.columns) 255 else: 256 X = X.reshape(1, -1) 257 y = np.array([y]) 258 259 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 260 261 self.obj.partial_fit(scaled_Z, centered_y, **kwargs) 262 263 self.X_ = X 264 265 self.y_ = y 266 267 return self 268 269 def predict(self, X, level=95, method=None, **kwargs): 270 """Predict test data X. 271 272 Parameters: 273 274 X: {array-like}, shape = [n_samples, n_features] 275 Training vectors, where n_samples is the number 276 of samples and n_features is the number of features. 277 278 level: int 279 Level of confidence (default = 95) 280 281 method: str 282 `None`, or 'splitconformal', 'localconformal' 283 prediction (if you specify `return_pi = True`) 284 285 **kwargs: additional parameters 286 `return_pi = True` for conformal prediction, 287 with `method` in ('splitconformal', 'localconformal') 288 or `return_std = True` for `self.obj` in 289 (`sklearn.linear_model.BayesianRidge`, 290 `sklearn.linear_model.ARDRegressor`, 291 `sklearn.gaussian_process.GaussianProcessRegressor`)` 292 293 Returns: 294 295 model predictions: 296 an array if uncertainty quantification is not requested, 297 or a tuple if with prediction intervals and simulations 298 if `return_std = True` (mean, standard deviation, 299 lower and upper prediction interval) or `return_pi = True` 300 () 301 302 """ 303 304 if "return_std" in kwargs: 305 306 alpha = 100 - level 307 pi_multiplier = norm.ppf(1 - alpha / 200) 308 309 if len(X.shape) == 1: 310 311 n_features = X.shape[0] 312 new_X = mo.rbind( 313 X.reshape(1, n_features), 314 np.ones(n_features).reshape(1, n_features), 315 ) 316 317 mean_, std_ = self.obj.predict( 318 self.cook_test_set(new_X, **kwargs), return_std=True 319 )[0] 320 321 preds = self.y_mean_ + mean_ 322 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 323 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 324 325 DescribeResults = namedtuple( 326 "DescribeResults", ["mean", "std", "lower", "upper"] 327 ) 328 329 return DescribeResults(preds, std_, lower, upper) 330 331 # len(X.shape) > 1 332 mean_, std_ = self.obj.predict( 333 self.cook_test_set(X, **kwargs), return_std=True 334 ) 335 336 preds = self.y_mean_ + mean_ 337 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 338 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 339 340 DescribeResults = namedtuple( 341 "DescribeResults", ["mean", "std", "lower", "upper"] 342 ) 343 344 return DescribeResults(preds, std_, lower, upper) 345 346 if "return_pi" in kwargs: 347 assert method in ( 348 "splitconformal", 349 "localconformal", 350 ), "method must be in ('splitconformal', 'localconformal')" 351 self.pi = PredictionInterval( 352 obj=self, 353 method=method, 354 level=level, 355 type_pi=self.type_pi, 356 replications=self.replications, 357 kernel=self.kernel, 358 ) 359 360 if len(self.X_.shape) == 1: 361 if isinstance(X, pd.DataFrame): 362 self.X_ = pd.DataFrame( 363 self.X_.values.reshape(1, -1), columns=self.X_.columns 364 ) 365 else: 366 self.X_ = self.X_.reshape(1, -1) 367 self.y_ = np.array([self.y_]) 368 369 self.pi.fit(self.X_, self.y_) 370 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 371 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 372 preds = self.pi.predict(X, return_pi=True) 373 return preds 374 375 # "return_std" not in kwargs 376 if len(X.shape) == 1: 377 378 n_features = X.shape[0] 379 new_X = mo.rbind( 380 X.reshape(1, n_features), 381 np.ones(n_features).reshape(1, n_features), 382 ) 383 384 return ( 385 self.y_mean_ 386 + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 387 )[0] 388 389 # len(X.shape) > 1 390 return self.y_mean_ + self.obj.predict( 391 self.cook_test_set(X, **kwargs), **kwargs 392 ) 393 394 def score(self, X, y, scoring=None): 395 """Compute the score of the model. 396 397 Parameters: 398 399 X: {array-like}, shape = [n_samples, n_features] 400 Training vectors, where n_samples is the number 401 of samples and n_features is the number of features. 402 403 y: array-like, shape = [n_samples] 404 Target values. 405 406 scoring: str 407 scoring method 408 409 Returns: 410 411 score: float 412 413 """ 414 415 if scoring is None: 416 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 417 418 return skm2.get_scorer(scoring)(self, X, y)
Custom Regression model
This class is used to 'augment' any regression model with transformed features.
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
type_pi: str.
type of prediction interval; currently `None` (split or local
conformal without simulation), "kde" or "bootstrap" (simulated split
conformal).
replications: int.
number of replications (if needed) for predictive simulation.
Used only in `self.predict`, for `self.kernel` in ('gaussian',
'tophat') and `self.type_pi = 'kde'`. Default is `None`.
kernel: str.
the kernel to use for kernel density estimation (used for predictive
simulation in `self.predict`, with `method='splitconformal'` and
`type_pi = 'kde'`). Currently, either 'gaussian' or 'tophat'.
type_split: str.
Type of splitting for conformal prediction. None (default), or
"random" (random split of data) or "sequential" (sequential split of data)
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
level: float
confidence level for prediction intervals
pi_method: str
method for prediction intervals: 'splitconformal' or 'localconformal'
seed: int
reproducibility seed for nodes_sim=='uniform'
type_fit: str
'regression'
backend: str
"cpu" or "gpu" or "tpu"
Examples:
See https://thierrymoudiki.github.io/blog/2024/03/18/python/conformal-and-bayesian-regression
175 def fit(self, X, y, sample_weight=None, **kwargs): 176 """Fit custom model to training data (X, y). 177 178 Parameters: 179 180 X: {array-like}, shape = [n_samples, n_features] 181 Training vectors, where n_samples is the number 182 of samples and n_features is the number of features. 183 184 y: array-like, shape = [n_samples] 185 Target values. 186 187 sample_weight: array-like, shape = [n_samples] 188 Sample weights. 189 190 **kwargs: additional parameters to be passed to 191 self.cook_training_set or self.obj.fit 192 193 Returns: 194 195 self: object 196 197 """ 198 199 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 200 201 if self.level is not None: 202 self.obj = PredictionInterval( 203 obj=self.obj, method=self.pi_method, level=self.level 204 ) 205 206 # if sample_weights, else: (must use self.row_index) 207 if sample_weight is not None: 208 self.obj.fit( 209 scaled_Z, 210 centered_y, 211 sample_weight=sample_weight[self.index_row_].ravel(), 212 **kwargs 213 ) 214 215 return self 216 217 self.obj.fit(scaled_Z, centered_y, **kwargs) 218 219 self.X_ = X 220 221 self.y_ = y 222 223 if hasattr(self.obj, "coef_"): 224 self.coef_ = self.obj.coef_ 225 226 if hasattr(self.obj, "intercept_"): 227 self.intercept_ = self.obj.intercept_ 228 229 return self
Fit custom model to training data (X, y).
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
269 def predict(self, X, level=95, method=None, **kwargs): 270 """Predict test data X. 271 272 Parameters: 273 274 X: {array-like}, shape = [n_samples, n_features] 275 Training vectors, where n_samples is the number 276 of samples and n_features is the number of features. 277 278 level: int 279 Level of confidence (default = 95) 280 281 method: str 282 `None`, or 'splitconformal', 'localconformal' 283 prediction (if you specify `return_pi = True`) 284 285 **kwargs: additional parameters 286 `return_pi = True` for conformal prediction, 287 with `method` in ('splitconformal', 'localconformal') 288 or `return_std = True` for `self.obj` in 289 (`sklearn.linear_model.BayesianRidge`, 290 `sklearn.linear_model.ARDRegressor`, 291 `sklearn.gaussian_process.GaussianProcessRegressor`)` 292 293 Returns: 294 295 model predictions: 296 an array if uncertainty quantification is not requested, 297 or a tuple if with prediction intervals and simulations 298 if `return_std = True` (mean, standard deviation, 299 lower and upper prediction interval) or `return_pi = True` 300 () 301 302 """ 303 304 if "return_std" in kwargs: 305 306 alpha = 100 - level 307 pi_multiplier = norm.ppf(1 - alpha / 200) 308 309 if len(X.shape) == 1: 310 311 n_features = X.shape[0] 312 new_X = mo.rbind( 313 X.reshape(1, n_features), 314 np.ones(n_features).reshape(1, n_features), 315 ) 316 317 mean_, std_ = self.obj.predict( 318 self.cook_test_set(new_X, **kwargs), return_std=True 319 )[0] 320 321 preds = self.y_mean_ + mean_ 322 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 323 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 324 325 DescribeResults = namedtuple( 326 "DescribeResults", ["mean", "std", "lower", "upper"] 327 ) 328 329 return DescribeResults(preds, std_, lower, upper) 330 331 # len(X.shape) > 1 332 mean_, std_ = self.obj.predict( 333 self.cook_test_set(X, **kwargs), return_std=True 334 ) 335 336 preds = self.y_mean_ + mean_ 337 lower = self.y_mean_ + (mean_ - pi_multiplier * std_) 338 upper = self.y_mean_ + (mean_ + pi_multiplier * std_) 339 340 DescribeResults = namedtuple( 341 "DescribeResults", ["mean", "std", "lower", "upper"] 342 ) 343 344 return DescribeResults(preds, std_, lower, upper) 345 346 if "return_pi" in kwargs: 347 assert method in ( 348 "splitconformal", 349 "localconformal", 350 ), "method must be in ('splitconformal', 'localconformal')" 351 self.pi = PredictionInterval( 352 obj=self, 353 method=method, 354 level=level, 355 type_pi=self.type_pi, 356 replications=self.replications, 357 kernel=self.kernel, 358 ) 359 360 if len(self.X_.shape) == 1: 361 if isinstance(X, pd.DataFrame): 362 self.X_ = pd.DataFrame( 363 self.X_.values.reshape(1, -1), columns=self.X_.columns 364 ) 365 else: 366 self.X_ = self.X_.reshape(1, -1) 367 self.y_ = np.array([self.y_]) 368 369 self.pi.fit(self.X_, self.y_) 370 # self.X_ = None # consumes memory to keep, dangerous to delete (side effect) 371 # self.y_ = None # consumes memory to keep, dangerous to delete (side effect) 372 preds = self.pi.predict(X, return_pi=True) 373 return preds 374 375 # "return_std" not in kwargs 376 if len(X.shape) == 1: 377 378 n_features = X.shape[0] 379 new_X = mo.rbind( 380 X.reshape(1, n_features), 381 np.ones(n_features).reshape(1, n_features), 382 ) 383 384 return ( 385 self.y_mean_ 386 + self.obj.predict(self.cook_test_set(new_X, **kwargs), **kwargs) 387 )[0] 388 389 # len(X.shape) > 1 390 return self.y_mean_ + self.obj.predict( 391 self.cook_test_set(X, **kwargs), **kwargs 392 )
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
`None`, or 'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
394 def score(self, X, y, scoring=None): 395 """Compute the score of the model. 396 397 Parameters: 398 399 X: {array-like}, shape = [n_samples, n_features] 400 Training vectors, where n_samples is the number 401 of samples and n_features is the number of features. 402 403 y: array-like, shape = [n_samples] 404 Target values. 405 406 scoring: str 407 scoring method 408 409 Returns: 410 411 score: float 412 413 """ 414 415 if scoring is None: 416 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 417 418 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
32class DeepClassifier(CustomClassifier, ClassifierMixin): 33 """ 34 Deep Classifier 35 36 Parameters: 37 38 obj: an object 39 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 40 41 n_layers: int (default=3) 42 Number of layers. `n_layers = 1` is a simple `CustomClassifier` 43 44 verbose : int, optional (default=0) 45 Monitor progress when fitting. 46 47 All the other parameters are nnetsauce `CustomClassifier`'s 48 49 Examples: 50 51 ```python 52 import nnetsauce as ns 53 from sklearn.datasets import load_breast_cancer 54 from sklearn.model_selection import train_test_split 55 from sklearn.linear_model import LogisticRegressionCV 56 data = load_breast_cancer() 57 X = data.data 58 y= data.target 59 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 60 obj = LogisticRegressionCV() 61 clf = ns.DeepClassifier(obj) 62 clf.fit(X_train, y_train) 63 print(clf.score(clf.predict(X_test), y_test)) 64 ``` 65 """ 66 67 def __init__( 68 self, 69 obj, 70 # Defining depth 71 n_layers=3, 72 verbose=0, 73 # CustomClassifier attributes 74 n_hidden_features=5, 75 activation_name="relu", 76 a=0.01, 77 nodes_sim="sobol", 78 bias=True, 79 dropout=0, 80 direct_link=True, 81 n_clusters=2, 82 cluster_encode=True, 83 type_clust="kmeans", 84 type_scaling=("std", "std", "std"), 85 col_sample=1, 86 row_sample=1, 87 cv_calibration=2, 88 calibration_method="sigmoid", 89 seed=123, 90 backend="cpu", 91 ): 92 super().__init__( 93 obj=obj, 94 n_hidden_features=n_hidden_features, 95 activation_name=activation_name, 96 a=a, 97 nodes_sim=nodes_sim, 98 bias=bias, 99 dropout=dropout, 100 direct_link=direct_link, 101 n_clusters=n_clusters, 102 cluster_encode=cluster_encode, 103 type_clust=type_clust, 104 type_scaling=type_scaling, 105 col_sample=col_sample, 106 row_sample=row_sample, 107 seed=seed, 108 backend=backend, 109 ) 110 self.coef_ = None 111 self.intercept_ = None 112 self.type_fit = "classification" 113 self.cv_calibration = cv_calibration 114 self.calibration_method = calibration_method 115 116 # Only wrap in CalibratedClassifierCV if not already wrapped 117 # if not isinstance(obj, CalibratedClassifierCV): 118 # self.obj = CalibratedClassifierCV( 119 # self.obj, 120 # cv=self.cv_calibration, 121 # method=self.calibration_method 122 # ) 123 # else: 124 self.coef_ = None 125 self.intercept_ = None 126 self.type_fit = "classification" 127 self.cv_calibration = cv_calibration 128 self.calibration_method = calibration_method 129 self.obj = obj 130 self._estimator_type = "classifier" # Add this line to explicitly mark as classifier 131 132 assert n_layers >= 1, "must have n_layers >= 1" 133 self.stacked_obj = obj 134 self.verbose = verbose 135 self.n_layers = n_layers 136 self.classes_ = None 137 self.n_classes_ = None 138 139 def fit(self, X, y, **kwargs): 140 """Fit Classification algorithms to X and y. 141 Parameters 142 ---------- 143 X : array-like, 144 Training vectors, where rows is the number of samples 145 and columns is the number of features. 146 y : array-like, 147 Training vectors, where rows is the number of samples 148 and columns is the number of features. 149 **kwargs: dict 150 Additional parameters to be passed to the fit method 151 of the base learner. For example, `sample_weight`. 152 153 Returns 154 ------- 155 A fitted object 156 """ 157 158 self.classes_ = np.unique(y) 159 self.n_classes_ = len( 160 self.classes_ 161 ) # for compatibility with scikit-learn 162 163 if isinstance(X, np.ndarray): 164 X = pd.DataFrame(X) 165 166 # init layer 167 self.stacked_obj = CustomClassifier( 168 obj=self.stacked_obj, 169 n_hidden_features=self.n_hidden_features, 170 activation_name=self.activation_name, 171 a=self.a, 172 nodes_sim=self.nodes_sim, 173 bias=self.bias, 174 dropout=self.dropout, 175 direct_link=self.direct_link, 176 n_clusters=self.n_clusters, 177 cluster_encode=self.cluster_encode, 178 type_clust=self.type_clust, 179 type_scaling=self.type_scaling, 180 col_sample=self.col_sample, 181 row_sample=self.row_sample, 182 cv_calibration=None, 183 calibration_method=None, 184 seed=self.seed, 185 backend=self.backend, 186 ) 187 188 if self.verbose > 0: 189 iterator = tqdm(range(self.n_layers - 1)) 190 else: 191 iterator = range(self.n_layers - 1) 192 193 for _ in iterator: 194 self.stacked_obj = deepcopy( 195 CustomClassifier( 196 obj=self.stacked_obj, 197 n_hidden_features=self.n_hidden_features, 198 activation_name=self.activation_name, 199 a=self.a, 200 nodes_sim=self.nodes_sim, 201 bias=self.bias, 202 dropout=self.dropout, 203 direct_link=self.direct_link, 204 n_clusters=self.n_clusters, 205 cluster_encode=self.cluster_encode, 206 type_clust=self.type_clust, 207 type_scaling=self.type_scaling, 208 col_sample=self.col_sample, 209 row_sample=self.row_sample, 210 cv_calibration=None, 211 calibration_method=None, 212 seed=self.seed, 213 backend=self.backend, 214 ) 215 ) 216 self.stacked_obj.fit(X, y, **kwargs) 217 218 return self 219 220 def partial_fit(self, X, y, **kwargs): 221 """Fit Regression algorithms to X and y. 222 Parameters 223 ---------- 224 X : array-like, 225 Training vectors, where rows is the number of samples 226 and columns is the number of features. 227 y : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 **kwargs: dict 231 Additional parameters to be passed to the fit method 232 of the base learner. For example, `sample_weight`. 233 Returns 234 ------- 235 A fitted object 236 """ 237 assert hasattr(self, "stacked_obj"), "model must be fitted first" 238 current_obj = self.stacked_obj 239 for _ in range(self.n_layers): 240 try: 241 input_X = current_obj.obj.cook_test_set(X) 242 current_obj.obj.partial_fit(input_X, y, **kwargs) 243 try: 244 current_obj = current_obj.obj 245 except AttributeError: 246 pass 247 except ValueError: 248 pass 249 return self 250 251 def predict(self, X): 252 print("self.stacked_obj", self.stacked_obj) 253 print("self.stacked_obj.get_params()", self.stacked_obj.get_params()) 254 return self.stacked_obj.predict(X) 255 256 def predict_proba(self, X): 257 return self.stacked_obj.predict_proba(X) 258 259 def score(self, X, y, scoring=None): 260 return self.stacked_obj.score(X, y, scoring) 261 262 def cross_val_optim( 263 self, 264 X_train, 265 y_train, 266 X_test=None, 267 y_test=None, 268 scoring="accuracy", 269 surrogate_obj=None, 270 cv=5, 271 n_jobs=None, 272 n_init=10, 273 n_iter=190, 274 abs_tol=1e-3, 275 verbose=2, 276 seed=123, 277 **kwargs, 278 ): 279 """Cross-validation function and hyperparameters' search 280 281 Parameters: 282 283 X_train: array-like, 284 Training vectors, where rows is the number of samples 285 and columns is the number of features. 286 287 y_train: array-like, 288 Training vectors, where rows is the number of samples 289 and columns is the number of features. 290 291 X_test: array-like, 292 Testing vectors, where rows is the number of samples 293 and columns is the number of features. 294 295 y_test: array-like, 296 Testing vectors, where rows is the number of samples 297 and columns is the number of features. 298 299 scoring: str 300 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 301 302 surrogate_obj: an object; 303 An ML model for estimating the uncertainty around the objective function 304 305 cv: int; 306 number of cross-validation folds 307 308 n_jobs: int; 309 number of jobs for parallel execution 310 311 n_init: an integer; 312 number of points in the initial setting, when `x_init` and `y_init` are not provided 313 314 n_iter: an integer; 315 number of iterations of the minimization algorithm 316 317 abs_tol: a float; 318 tolerance for convergence of the optimizer (early stopping based on acquisition function) 319 320 verbose: int 321 controls verbosity 322 323 seed: int 324 reproducibility seed 325 326 **kwargs: dict 327 additional parameters to be passed to the estimator 328 329 Examples: 330 331 ```python 332 ``` 333 """ 334 335 num_to_activation_name = {1: "relu", 2: "sigmoid", 3: "tanh"} 336 num_to_nodes_sim = {1: "sobol", 2: "uniform", 3: "hammersley"} 337 num_to_type_clust = {1: "kmeans", 2: "gmm"} 338 339 def deepclassifier_cv( 340 X_train, 341 y_train, 342 # Defining depth 343 n_layers=3, 344 # CustomClassifier attributes 345 n_hidden_features=5, 346 activation_name="relu", 347 nodes_sim="sobol", 348 dropout=0, 349 n_clusters=2, 350 type_clust="kmeans", 351 cv=5, 352 n_jobs=None, 353 scoring="accuracy", 354 seed=123, 355 ): 356 self.set_params( 357 **{ 358 "n_layers": n_layers, 359 # CustomClassifier attributes 360 "n_hidden_features": n_hidden_features, 361 "activation_name": activation_name, 362 "nodes_sim": nodes_sim, 363 "dropout": dropout, 364 "n_clusters": n_clusters, 365 "type_clust": type_clust, 366 **kwargs, 367 } 368 ) 369 return -cross_val_score( 370 estimator=self, 371 X=X_train, 372 y=y_train, 373 scoring=scoring, 374 cv=cv, 375 n_jobs=n_jobs, 376 verbose=0, 377 ).mean() 378 379 # objective function for hyperparams tuning 380 def crossval_objective(xx): 381 return deepclassifier_cv( 382 X_train=X_train, 383 y_train=y_train, 384 # Defining depth 385 n_layers=int(np.ceil(xx[0])), 386 # CustomClassifier attributes 387 n_hidden_features=int(np.ceil(xx[1])), 388 activation_name=num_to_activation_name[np.ceil(xx[2])], 389 nodes_sim=num_to_nodes_sim[int(np.ceil(xx[3]))], 390 dropout=xx[4], 391 n_clusters=int(np.ceil(xx[5])), 392 type_clust=num_to_type_clust[int(np.ceil(xx[6]))], 393 cv=cv, 394 n_jobs=n_jobs, 395 scoring=scoring, 396 seed=seed, 397 ) 398 399 if surrogate_obj is None: 400 gp_opt = gp.GPOpt( 401 objective_func=crossval_objective, 402 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 403 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 404 params_names=[ 405 "n_layers", 406 # CustomClassifier attributes 407 "n_hidden_features", 408 "activation_name", 409 "nodes_sim", 410 "dropout", 411 "n_clusters", 412 "type_clust", 413 ], 414 method="bayesian", 415 n_init=n_init, 416 n_iter=n_iter, 417 seed=seed, 418 ) 419 else: 420 gp_opt = gp.GPOpt( 421 objective_func=crossval_objective, 422 lower_bound=np.array([0, 3, 0, 0, 0.0, 0, 0]), 423 upper_bound=np.array([5, 100, 3, 3, 0.4, 5, 2]), 424 params_names=[ 425 "n_layers", 426 # CustomClassifier attributes 427 "n_hidden_features", 428 "activation_name", 429 "nodes_sim", 430 "dropout", 431 "n_clusters", 432 "type_clust", 433 ], 434 acquisition="ucb", 435 method="splitconformal", 436 surrogate_obj=ns.PredictionInterval( 437 obj=surrogate_obj, method="splitconformal" 438 ), 439 n_init=n_init, 440 n_iter=n_iter, 441 seed=seed, 442 ) 443 444 res = gp_opt.optimize(verbose=verbose, abs_tol=abs_tol) 445 res.best_params["n_layers"] = int(np.ceil(res.best_params["n_layers"])) 446 res.best_params["n_hidden_features"] = int( 447 np.ceil(res.best_params["n_hidden_features"]) 448 ) 449 res.best_params["activation_name"] = num_to_activation_name[ 450 np.ceil(res.best_params["activation_name"]) 451 ] 452 res.best_params["nodes_sim"] = num_to_nodes_sim[ 453 int(np.ceil(res.best_params["nodes_sim"])) 454 ] 455 res.best_params["dropout"] = res.best_params["dropout"] 456 res.best_params["n_clusters"] = int(np.ceil(res.best_params["n_clusters"])) 457 res.best_params["type_clust"] = num_to_type_clust[ 458 int(np.ceil(res.best_params["type_clust"])) 459 ] 460 461 # out-of-sample error 462 if X_test is not None and y_test is not None: 463 self.set_params(**res.best_params, verbose=0, seed=seed) 464 preds = self.fit(X_train, y_train).predict(X_test) 465 # check error on y_test 466 oos_err = getattr(metrics, scoring + "_score")(y_true=y_test, y_pred=preds) 467 result = namedtuple("result", res._fields + ("test_" + scoring,)) 468 return result(*res, oos_err) 469 else: 470 return res 471 472 def lazy_cross_val_optim( 473 self, 474 X_train, 475 y_train, 476 X_test=None, 477 y_test=None, 478 scoring="accuracy", 479 surrogate_objs=None, 480 customize=False, 481 cv=5, 482 n_jobs=None, 483 n_init=10, 484 n_iter=190, 485 abs_tol=1e-3, 486 verbose=1, 487 seed=123, 488 ): 489 """Automated Cross-validation function and hyperparameters' search using multiple surrogates 490 491 Parameters: 492 493 X_train: array-like, 494 Training vectors, where rows is the number of samples 495 and columns is the number of features. 496 497 y_train: array-like, 498 Training vectors, where rows is the number of samples 499 and columns is the number of features. 500 501 X_test: array-like, 502 Testing vectors, where rows is the number of samples 503 and columns is the number of features. 504 505 y_test: array-like, 506 Testing vectors, where rows is the number of samples 507 and columns is the number of features. 508 509 scoring: str 510 scoring metric; see https://scikit-learn.org/stable/modules/model_evaluation.html#the-scoring-parameter-defining-model-evaluation-rules 511 512 surrogate_objs: object names as a list of strings; 513 ML models for estimating the uncertainty around the objective function 514 515 customize: boolean 516 if True, the surrogate is transformed into a quasi-randomized network (default is False) 517 518 cv: int; 519 number of cross-validation folds 520 521 n_jobs: int; 522 number of jobs for parallel execution 523 524 n_init: an integer; 525 number of points in the initial setting, when `x_init` and `y_init` are not provided 526 527 n_iter: an integer; 528 number of iterations of the minimization algorithm 529 530 abs_tol: a float; 531 tolerance for convergence of the optimizer (early stopping based on acquisition function) 532 533 verbose: int 534 controls verbosity 535 536 seed: int 537 reproducibility seed 538 539 Examples: 540 541 ```python 542 ``` 543 """ 544 545 removed_regressors = [ 546 "TheilSenRegressor", 547 "ARDRegression", 548 "CCA", 549 "GaussianProcessRegressor", 550 "GradientBoostingRegressor", 551 "HistGradientBoostingRegressor", 552 "IsotonicRegression", 553 "MultiOutputRegressor", 554 "MultiTaskElasticNet", 555 "MultiTaskElasticNetCV", 556 "MultiTaskLasso", 557 "MultiTaskLassoCV", 558 "OrthogonalMatchingPursuit", 559 "OrthogonalMatchingPursuitCV", 560 "PLSCanonical", 561 "PLSRegression", 562 "RadiusNeighborsRegressor", 563 "RegressorChain", 564 "StackingRegressor", 565 "VotingRegressor", 566 ] 567 568 results = [] 569 570 for est in all_estimators(): 571 572 if surrogate_objs is None: 573 574 if issubclass(est[1], RegressorMixin) and ( 575 est[0] not in removed_regressors 576 ): 577 try: 578 if customize == True: 579 print(f"\n surrogate: CustomClassifier({est[0]})") 580 surr_obj = ns.CustomClassifier(obj=est[1]()) 581 else: 582 print(f"\n surrogate: {est[0]}") 583 surr_obj = est[1]() 584 res = self.cross_val_optim( 585 X_train=X_train, 586 y_train=y_train, 587 X_test=X_test, 588 y_test=y_test, 589 surrogate_obj=surr_obj, 590 cv=cv, 591 n_jobs=n_jobs, 592 scoring=scoring, 593 n_init=n_init, 594 n_iter=n_iter, 595 abs_tol=abs_tol, 596 verbose=verbose, 597 seed=seed, 598 ) 599 print(f"\n result: {res}") 600 if customize == True: 601 results.append((f"CustomClassifier({est[0]})", res)) 602 else: 603 results.append((est[0], res)) 604 except: 605 pass 606 607 else: 608 609 if ( 610 issubclass(est[1], RegressorMixin) 611 and (est[0] not in removed_regressors) 612 and est[0] in surrogate_objs 613 ): 614 try: 615 if customize == True: 616 print(f"\n surrogate: CustomClassifier({est[0]})") 617 surr_obj = ns.CustomClassifier(obj=est[1]()) 618 else: 619 print(f"\n surrogate: {est[0]}") 620 surr_obj = est[1]() 621 res = self.cross_val_optim( 622 X_train=X_train, 623 y_train=y_train, 624 X_test=X_test, 625 y_test=y_test, 626 surrogate_obj=surr_obj, 627 cv=cv, 628 n_jobs=n_jobs, 629 scoring=scoring, 630 n_init=n_init, 631 n_iter=n_iter, 632 abs_tol=abs_tol, 633 verbose=verbose, 634 seed=seed, 635 ) 636 print(f"\n result: {res}") 637 if customize == True: 638 results.append((f"CustomClassifier({est[0]})", res)) 639 else: 640 results.append((est[0], res)) 641 except: 642 pass 643 644 return results
Deep Classifier
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomClassifier`
verbose : int, optional (default=0)
Monitor progress when fitting.
All the other parameters are nnetsauce `CustomClassifier`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = LogisticRegressionCV()
clf = ns.DeepClassifier(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
139 def fit(self, X, y, **kwargs): 140 """Fit Classification algorithms to X and y. 141 Parameters 142 ---------- 143 X : array-like, 144 Training vectors, where rows is the number of samples 145 and columns is the number of features. 146 y : array-like, 147 Training vectors, where rows is the number of samples 148 and columns is the number of features. 149 **kwargs: dict 150 Additional parameters to be passed to the fit method 151 of the base learner. For example, `sample_weight`. 152 153 Returns 154 ------- 155 A fitted object 156 """ 157 158 self.classes_ = np.unique(y) 159 self.n_classes_ = len( 160 self.classes_ 161 ) # for compatibility with scikit-learn 162 163 if isinstance(X, np.ndarray): 164 X = pd.DataFrame(X) 165 166 # init layer 167 self.stacked_obj = CustomClassifier( 168 obj=self.stacked_obj, 169 n_hidden_features=self.n_hidden_features, 170 activation_name=self.activation_name, 171 a=self.a, 172 nodes_sim=self.nodes_sim, 173 bias=self.bias, 174 dropout=self.dropout, 175 direct_link=self.direct_link, 176 n_clusters=self.n_clusters, 177 cluster_encode=self.cluster_encode, 178 type_clust=self.type_clust, 179 type_scaling=self.type_scaling, 180 col_sample=self.col_sample, 181 row_sample=self.row_sample, 182 cv_calibration=None, 183 calibration_method=None, 184 seed=self.seed, 185 backend=self.backend, 186 ) 187 188 if self.verbose > 0: 189 iterator = tqdm(range(self.n_layers - 1)) 190 else: 191 iterator = range(self.n_layers - 1) 192 193 for _ in iterator: 194 self.stacked_obj = deepcopy( 195 CustomClassifier( 196 obj=self.stacked_obj, 197 n_hidden_features=self.n_hidden_features, 198 activation_name=self.activation_name, 199 a=self.a, 200 nodes_sim=self.nodes_sim, 201 bias=self.bias, 202 dropout=self.dropout, 203 direct_link=self.direct_link, 204 n_clusters=self.n_clusters, 205 cluster_encode=self.cluster_encode, 206 type_clust=self.type_clust, 207 type_scaling=self.type_scaling, 208 col_sample=self.col_sample, 209 row_sample=self.row_sample, 210 cv_calibration=None, 211 calibration_method=None, 212 seed=self.seed, 213 backend=self.backend, 214 ) 215 ) 216 self.stacked_obj.fit(X, y, **kwargs) 217 218 return self
Fit Classification algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
251 def predict(self, X): 252 print("self.stacked_obj", self.stacked_obj) 253 print("self.stacked_obj.get_params()", self.stacked_obj.get_params()) 254 return self.stacked_obj.predict(X)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
13class DeepRegressor(CustomRegressor, RegressorMixin): 14 """ 15 Deep Regressor 16 17 Parameters: 18 19 obj: an object 20 A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification 21 22 verbose : int, optional (default=0) 23 Monitor progress when fitting. 24 25 n_layers: int (default=3) 26 Number of layers. `n_layers = 1` is a simple `CustomRegressor` 27 28 All the other parameters are nnetsauce `CustomRegressor`'s 29 30 Examples: 31 32 ```python 33 import nnetsauce as ns 34 from sklearn.datasets import load_diabetes 35 from sklearn.model_selection import train_test_split 36 from sklearn.linear_model import RidgeCV 37 data = load_diabetes() 38 X = data.data 39 y= data.target 40 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123) 41 obj = RidgeCV() 42 clf = ns.DeepRegressor(obj) 43 clf.fit(X_train, y_train) 44 print(clf.score(clf.predict(X_test), y_test)) 45 ``` 46 47 """ 48 49 def __init__( 50 self, 51 obj, 52 # Defining depth 53 n_layers=3, 54 verbose=0, 55 # CustomRegressor attributes 56 n_hidden_features=5, 57 activation_name="relu", 58 a=0.01, 59 nodes_sim="sobol", 60 bias=True, 61 dropout=0, 62 direct_link=True, 63 n_clusters=2, 64 cluster_encode=True, 65 type_clust="kmeans", 66 type_scaling=("std", "std", "std"), 67 col_sample=1, 68 row_sample=1, 69 level=None, 70 pi_method="splitconformal", 71 seed=123, 72 backend="cpu", 73 ): 74 super().__init__( 75 obj=obj, 76 n_hidden_features=n_hidden_features, 77 activation_name=activation_name, 78 a=a, 79 nodes_sim=nodes_sim, 80 bias=bias, 81 dropout=dropout, 82 direct_link=direct_link, 83 n_clusters=n_clusters, 84 cluster_encode=cluster_encode, 85 type_clust=type_clust, 86 type_scaling=type_scaling, 87 col_sample=col_sample, 88 row_sample=row_sample, 89 level=level, 90 pi_method=pi_method, 91 seed=seed, 92 backend=backend, 93 ) 94 95 assert n_layers >= 1, "must have n_layers >= 1" 96 97 self.stacked_obj = deepcopy(obj) 98 self.verbose = verbose 99 self.n_layers = n_layers 100 self.level = level 101 self.pi_method = pi_method 102 self.coef_ = None 103 104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self 195 196 def partial_fit(self, X, y, **kwargs): 197 """Fit Regression algorithms to X and y. 198 Parameters 199 ---------- 200 X : array-like, 201 Training vectors, where rows is the number of samples 202 and columns is the number of features. 203 y : array-like, 204 Training vectors, where rows is the number of samples 205 and columns is the number of features. 206 **kwargs: dict 207 Additional parameters to be passed to the fit method 208 of the base learner. For example, `sample_weight`. 209 Returns 210 ------- 211 A fitted object 212 """ 213 assert hasattr(self, "stacked_obj"), "model must be fitted first" 214 current_obj = self.stacked_obj 215 for _ in range(self.n_layers): 216 try: 217 input_X = current_obj.obj.cook_test_set(X) 218 current_obj.obj.partial_fit(input_X, y, **kwargs) 219 try: 220 current_obj = current_obj.obj 221 except AttributeError: 222 pass 223 except ValueError as e: 224 print(e) 225 pass 226 return self 227 228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs) 232 233 def score(self, X, y, scoring=None): 234 return self.stacked_obj.score(X, y, scoring)
Deep Regressor
Parameters:
obj: an object
A base learner, see also https://www.researchgate.net/publication/380701207_Deep_Quasi-Randomized_neural_Networks_for_classification
verbose : int, optional (default=0)
Monitor progress when fitting.
n_layers: int (default=3)
Number of layers. `n_layers = 1` is a simple `CustomRegressor`
All the other parameters are nnetsauce `CustomRegressor`'s
Examples:
import nnetsauce as ns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeCV
data = load_diabetes()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)
obj = RidgeCV()
clf = ns.DeepRegressor(obj)
clf.fit(X_train, y_train)
print(clf.score(clf.predict(X_test), y_test))
104 def fit(self, X, y, **kwargs): 105 """Fit Regression algorithms to X and y. 106 Parameters 107 ---------- 108 X : array-like, 109 Training vectors, where rows is the number of samples 110 and columns is the number of features. 111 y : array-like, 112 Training vectors, where rows is the number of samples 113 and columns is the number of features. 114 **kwargs: dict 115 Additional parameters to be passed to the fit method 116 of the base learner. For example, `sample_weight`. 117 Returns 118 ------- 119 A fitted object 120 """ 121 122 if isinstance(X, np.ndarray): 123 X = pd.DataFrame(X) 124 125 # init layer 126 self.stacked_obj = CustomRegressor( 127 obj=self.stacked_obj, 128 n_hidden_features=self.n_hidden_features, 129 activation_name=self.activation_name, 130 a=self.a, 131 nodes_sim=self.nodes_sim, 132 bias=self.bias, 133 dropout=self.dropout, 134 direct_link=self.direct_link, 135 n_clusters=self.n_clusters, 136 cluster_encode=self.cluster_encode, 137 type_clust=self.type_clust, 138 type_scaling=self.type_scaling, 139 col_sample=self.col_sample, 140 row_sample=self.row_sample, 141 seed=self.seed, 142 backend=self.backend, 143 ) 144 145 if self.verbose > 0: 146 iterator = tqdm(range(self.n_layers - 1)) 147 else: 148 iterator = range(self.n_layers - 1) 149 150 for _ in iterator: 151 self.stacked_obj = deepcopy( 152 CustomRegressor( 153 obj=self.stacked_obj, 154 n_hidden_features=self.n_hidden_features, 155 activation_name=self.activation_name, 156 a=self.a, 157 nodes_sim=self.nodes_sim, 158 bias=self.bias, 159 dropout=self.dropout, 160 direct_link=self.direct_link, 161 n_clusters=self.n_clusters, 162 cluster_encode=self.cluster_encode, 163 type_clust=self.type_clust, 164 type_scaling=self.type_scaling, 165 col_sample=self.col_sample, 166 row_sample=self.row_sample, 167 seed=self.seed, 168 backend=self.backend, 169 ) 170 ) 171 172 self.stacked_obj.fit(X, y, **kwargs) 173 174 if self.level is not None: 175 self.stacked_obj = PredictionInterval( 176 obj=self.stacked_obj, method=self.pi_method, level=self.level 177 ) 178 179 if hasattr(self.stacked_obj, "clustering_obj_"): 180 self.clustering_obj_ = self.stacked_obj.clustering_obj_ 181 182 if hasattr(self.stacked_obj, "coef_"): 183 self.coef_ = self.stacked_obj.coef_ 184 185 if hasattr(self.stacked_obj, "scaler_"): 186 self.scaler_ = self.stacked_obj.scaler_ 187 188 if hasattr(self.stacked_obj, "nn_scaler_"): 189 self.nn_scaler_ = self.stacked_obj.nn_scaler_ 190 191 if hasattr(self.stacked_obj, "clustering_scaler_"): 192 self.clustering_scaler_ = self.stacked_obj.clustering_scaler_ 193 194 return self
Fit Regression algorithms to X and y.
Parameters
X : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
**kwargs: dict
Additional parameters to be passed to the fit method
of the base learner. For example, sample_weight.
Returns
A fitted object
228 def predict(self, X, **kwargs): 229 if self.level is not None: 230 return self.stacked_obj.predict(X, return_pi=True) 231 return self.stacked_obj.predict(X, **kwargs)
Predict test data X.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
level: int
Level of confidence (default = 95)
method: str
`None`, or 'splitconformal', 'localconformal'
prediction (if you specify `return_pi = True`)
**kwargs: additional parameters
`return_pi = True` for conformal prediction,
with `method` in ('splitconformal', 'localconformal')
or `return_std = True` for `self.obj` in
(`sklearn.linear_model.BayesianRidge`,
`sklearn.linear_model.ARDRegressor`,
`sklearn.gaussian_process.GaussianProcessRegressor`)`
Returns:
model predictions:
an array if uncertainty quantification is not requested,
or a tuple if with prediction intervals and simulations
if `return_std = True` (mean, standard deviation,
lower and upper prediction interval) or `return_pi = True`
()
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
11class DeepMTS(MTS): 12 """Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress) 13 14 Parameters: 15 16 obj: object. 17 any object containing a method fit (obj.fit()) and a method predict 18 (obj.predict()). 19 20 n_layers: int. 21 number of layers in the neural network. 22 23 n_hidden_features: int. 24 number of nodes in the hidden layer. 25 26 activation_name: str. 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 28 29 a: float. 30 hyperparameter for 'prelu' or 'elu' activation function. 31 32 nodes_sim: str. 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform'. 35 36 bias: boolean. 37 indicates if the hidden layer contains a bias term (True) or not 38 (False). 39 40 dropout: float. 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training. 43 44 direct_link: boolean. 45 indicates if the original predictors are included (True) in model's fitting or not (False). 46 47 n_clusters: int. 48 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 49 50 cluster_encode: bool. 51 defines how the variable containing clusters is treated (default is one-hot) 52 if `False`, then labels are used, without one-hot encoding. 53 54 type_clust: str. 55 type of clustering method: currently k-means ('kmeans') or Gaussian 56 Mixture Model ('gmm'). 57 58 type_scaling: a tuple of 3 strings. 59 scaling methods for inputs, hidden layer, and clustering respectively 60 (and when relevant). 61 Currently available: standardization ('std') or MinMax scaling ('minmax'). 62 63 lags: int. 64 number of lags used for each time series. 65 66 type_pi: str. 67 type of prediction interval; currently: 68 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 69 - "kde": based on Kernel Density Estimation of in-sample residuals 70 - "bootstrap": based on independent bootstrap of in-sample residuals 71 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 72 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 73 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 74 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 75 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 76 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 77 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 78 79 block_size: int. 80 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 81 Default is round(3.15*(n_residuals^1/3)) 82 83 replications: int. 84 number of replications (if needed, for predictive simulation). Default is 'None'. 85 86 kernel: str. 87 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 88 89 agg: str. 90 either "mean" or "median" for simulation of bootstrap aggregating 91 92 seed: int. 93 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 94 95 backend: str. 96 "cpu" or "gpu" or "tpu". 97 98 verbose: int. 99 0: not printing; 1: printing 100 101 show_progress: bool. 102 True: progress bar when fitting each series; False: no progress bar when fitting each series 103 104 Attributes: 105 106 fit_objs_: dict 107 objects adjusted to each individual time series 108 109 y_: {array-like} 110 DeepMTS responses (most recent observations first) 111 112 X_: {array-like} 113 DeepMTS lags 114 115 xreg_: {array-like} 116 external regressors 117 118 y_means_: dict 119 a dictionary of each series mean values 120 121 preds_: {array-like} 122 successive model predictions 123 124 preds_std_: {array-like} 125 standard deviation around the predictions 126 127 return_std_: boolean 128 return uncertainty or not (set in predict) 129 130 df_: data frame 131 the input data frame, in case a data.frame is provided to `fit` 132 133 Examples: 134 135 Example 1: 136 137 ```python 138 import nnetsauce as ns 139 import numpy as np 140 from sklearn import linear_model 141 np.random.seed(123) 142 143 M = np.random.rand(10, 3) 144 M[:,0] = 10*M[:,0] 145 M[:,2] = 25*M[:,2] 146 print(M) 147 148 # Adjust Bayesian Ridge 149 regr4 = linear_model.BayesianRidge() 150 obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5) 151 obj_DeepMTS.fit(M) 152 print(obj_DeepMTS.predict()) 153 154 # with credible intervals 155 print(obj_DeepMTS.predict(return_std=True, level=80)) 156 157 print(obj_DeepMTS.predict(return_std=True, level=95)) 158 ``` 159 160 Example 2: 161 162 ```python 163 import nnetsauce as ns 164 import numpy as np 165 from sklearn import linear_model 166 167 dataset = { 168 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 169 'series1' : [34, 30, 35.6, 33.3, 38.1], 170 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 171 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 172 df = pd.DataFrame(dataset).set_index('date') 173 print(df) 174 175 # Adjust Bayesian Ridge 176 regr5 = linear_model.BayesianRidge() 177 obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5) 178 obj_DeepMTS.fit(df) 179 print(obj_DeepMTS.predict()) 180 181 # with credible intervals 182 print(obj_DeepMTS.predict(return_std=True, level=80)) 183 184 print(obj_DeepMTS.predict(return_std=True, level=95)) 185 ``` 186 187 """ 188 189 # construct the object ----- 190 191 def __init__( 192 self, 193 obj, 194 n_layers=3, 195 n_hidden_features=5, 196 activation_name="relu", 197 a=0.01, 198 nodes_sim="sobol", 199 bias=True, 200 dropout=0, 201 direct_link=True, 202 n_clusters=2, 203 cluster_encode=True, 204 type_clust="kmeans", 205 type_scaling=("std", "std", "std"), 206 lags=1, 207 type_pi="kde", 208 block_size=None, 209 replications=None, 210 kernel=None, 211 agg="mean", 212 seed=123, 213 backend="cpu", 214 verbose=0, 215 show_progress=True, 216 ): 217 assert int(lags) == lags, "parameter 'lags' should be an integer" 218 assert n_layers >= 1, "must have n_layers >= 1" 219 self.n_layers = int(n_layers) 220 221 if self.n_layers > 1: 222 223 for _ in range(self.n_layers - 1): 224 obj = CustomRegressor( 225 obj=deepcopy(obj), 226 n_hidden_features=n_hidden_features, 227 activation_name=activation_name, 228 a=a, 229 nodes_sim=nodes_sim, 230 bias=bias, 231 dropout=dropout, 232 direct_link=direct_link, 233 n_clusters=n_clusters, 234 cluster_encode=cluster_encode, 235 type_clust=type_clust, 236 type_scaling=type_scaling, 237 seed=seed, 238 backend=backend, 239 ) 240 241 self.obj = deepcopy(obj) 242 super().__init__( 243 obj=self.obj, 244 n_hidden_features=n_hidden_features, 245 activation_name=activation_name, 246 a=a, 247 nodes_sim=nodes_sim, 248 bias=bias, 249 dropout=dropout, 250 direct_link=direct_link, 251 n_clusters=n_clusters, 252 cluster_encode=cluster_encode, 253 type_clust=type_clust, 254 type_scaling=type_scaling, 255 lags=lags, 256 type_pi=type_pi, 257 block_size=block_size, 258 replications=replications, 259 kernel=kernel, 260 agg=agg, 261 seed=seed, 262 backend=backend, 263 verbose=verbose, 264 show_progress=show_progress, 265 )
Univariate and multivariate time series (DeepMTS) forecasting with Quasi-Randomized networks (Work in progress)
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_layers: int.
number of layers in the neural network.
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
DeepMTS responses (most recent observations first)
X_: {array-like}
DeepMTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr4, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(M)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_DeepMTS = ns.DeepMTS(regr5, lags = 1, n_hidden_features=5)
obj_DeepMTS.fit(df)
print(obj_DeepMTS.predict())
# with credible intervals
print(obj_DeepMTS.predict(return_std=True, level=80))
print(obj_DeepMTS.predict(return_std=True, level=95))
6class Downloader: 7 """Download datasets from data sources (R-universe for now)""" 8 9 def __init__(self): 10 self.pkgname = None 11 self.dataset = None 12 self.source = None 13 self.url = None 14 self.request = None 15 16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
16 def download( 17 self, 18 pkgname="MASS", 19 dataset="Boston", 20 source="https://cran.r-universe.dev/", 21 **kwargs 22 ): 23 """Download datasets from data sources (R-universe for now) 24 25 Examples: 26 27 ```python 28 import nnetsauce as ns 29 30 downloader = ns.Downloader() 31 df = downloader.download(pkgname="MASS", dataset="Boston") 32 ``` 33 34 """ 35 self.pkgname = pkgname 36 self.dataset = dataset 37 self.source = source 38 self.url = source + pkgname + "/data/" + dataset + "/json" 39 self.request = requests.get(self.url) 40 return pd.DataFrame(self.request.json(), **kwargs)
Download datasets from data sources (R-universe for now)
Examples:
import nnetsauce as ns
downloader = ns.Downloader()
df = downloader.download(pkgname="MASS", dataset="Boston")
16class GLMClassifier(GLM, ClassifierMixin): 17 """Generalized 'linear' models using quasi-randomized networks (classification) 18 19 Parameters: 20 21 n_hidden_features: int 22 number of nodes in the hidden layer 23 24 lambda1: float 25 regularization parameter for GLM coefficients on original features 26 27 alpha1: float 28 controls compromize between l1 and l2 norm of GLM coefficients on original features 29 30 lambda2: float 31 regularization parameter for GLM coefficients on nonlinear features 32 33 alpha2: float 34 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 35 36 activation_name: str 37 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 38 39 a: float 40 hyperparameter for 'prelu' or 'elu' activation function 41 42 nodes_sim: str 43 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 44 'uniform' 45 46 bias: boolean 47 indicates if the hidden layer contains a bias term (True) or not 48 (False) 49 50 dropout: float 51 regularization parameter; (random) percentage of nodes dropped out 52 of the training 53 54 direct_link: boolean 55 indicates if the original predictors are included (True) in model's 56 fitting or not (False) 57 58 n_clusters: int 59 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 60 no clustering) 61 62 cluster_encode: bool 63 defines how the variable containing clusters is treated (default is one-hot) 64 if `False`, then labels are used, without one-hot encoding 65 66 type_clust: str 67 type of clustering method: currently k-means ('kmeans') or Gaussian 68 Mixture Model ('gmm') 69 70 type_scaling: a tuple of 3 strings 71 scaling methods for inputs, hidden layer, and clustering respectively 72 (and when relevant). 73 Currently available: standardization ('std') or MinMax scaling ('minmax') 74 75 optimizer: object 76 optimizer, from class nnetsauce.Optimizer 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 Attributes: 82 83 beta_: vector 84 regression coefficients 85 86 Examples: 87 88 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py) 89 90 """ 91 92 # construct the object ----- 93 94 def __init__( 95 self, 96 n_hidden_features=5, 97 lambda1=0.01, 98 alpha1=0.5, 99 lambda2=0.01, 100 alpha2=0.5, 101 family="expit", 102 activation_name="relu", 103 a=0.01, 104 nodes_sim="sobol", 105 bias=True, 106 dropout=0, 107 direct_link=True, 108 n_clusters=2, 109 cluster_encode=True, 110 type_clust="kmeans", 111 type_scaling=("std", "std", "std"), 112 optimizer=Optimizer(), 113 seed=123, 114 ): 115 super().__init__( 116 n_hidden_features=n_hidden_features, 117 lambda1=lambda1, 118 alpha1=alpha1, 119 lambda2=lambda2, 120 alpha2=alpha2, 121 activation_name=activation_name, 122 a=a, 123 nodes_sim=nodes_sim, 124 bias=bias, 125 dropout=dropout, 126 direct_link=direct_link, 127 n_clusters=n_clusters, 128 cluster_encode=cluster_encode, 129 type_clust=type_clust, 130 type_scaling=type_scaling, 131 optimizer=optimizer, 132 seed=seed, 133 ) 134 135 self.family = family 136 137 def logit_loss(self, Y, row_index, XB): 138 self.n_classes = Y.shape[1] # len(np.unique(y)) 139 # Y = mo.one_hot_encode2(y, self.n_classes) 140 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 141 142 # max_double = 709.0 # only if softmax 143 # XB[XB > max_double] = max_double 144 XB[XB > 709.0] = 709.0 145 146 if row_index is None: 147 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 148 149 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 150 151 def expit_erf_loss(self, Y, row_index, XB): 152 # self.n_classes = len(np.unique(y)) 153 # Y = mo.one_hot_encode2(y, self.n_classes) 154 # Y = self.optimizer.one_hot_encode(y, self.n_classes) 155 self.n_classes = Y.shape[1] 156 157 if row_index is None: 158 return -np.mean(np.sum(Y * XB, axis=1) - logsumexp(XB)) 159 160 return -np.mean(np.sum(Y[row_index, :] * XB, axis=1) - logsumexp(XB)) 161 162 def loss_func( 163 self, beta, group_index, X, Y, y, row_index=None, type_loss="logit", **kwargs 164 ): 165 res = { 166 "logit": self.logit_loss, 167 "expit": self.expit_erf_loss, 168 "erf": self.expit_erf_loss, 169 } 170 171 if row_index is None: 172 row_index = range(len(y)) 173 XB = self.compute_XB( 174 X, 175 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 176 ) 177 178 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 179 group_index=group_index, beta=beta 180 ) 181 182 XB = self.compute_XB( 183 X, 184 beta=np.reshape(beta, (X.shape[1], self.n_classes), order="F"), 185 row_index=row_index, 186 ) 187 188 return res[type_loss](Y, row_index, XB) + self.compute_penalty( 189 group_index=group_index, beta=beta 190 ) 191 192 def fit(self, X, y, **kwargs): 193 """Fit GLM model to training data (X, y). 194 195 Args: 196 197 X: {array-like}, shape = [n_samples, n_features] 198 Training vectors, where n_samples is the number 199 of samples and n_features is the number of features. 200 201 y: array-like, shape = [n_samples] 202 Target values. 203 204 **kwargs: additional parameters to be passed to 205 self.cook_training_set or self.obj.fit 206 207 Returns: 208 209 self: object 210 211 """ 212 213 assert mx.is_factor( 214 y 215 ), "y must contain only integers" # change is_factor and subsampling everywhere 216 217 self.classes_ = np.unique(y) # for compatibility with sklearn 218 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 219 220 self.beta_ = None 221 222 n, p = X.shape 223 224 self.group_index = n * X.shape[1] 225 226 self.n_classes = len(np.unique(y)) 227 228 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 229 230 # Y = mo.one_hot_encode2(output_y, self.n_classes) 231 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 232 233 # initialization 234 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 235 236 # optimization 237 # fit(self, loss_func, response, x0, **kwargs): 238 # loss_func(self, beta, group_index, X, y, 239 # row_index=None, type_loss="gaussian", 240 # **kwargs) 241 self.optimizer.fit( 242 self.loss_func, 243 response=y, 244 x0=beta_.flatten(order="F"), 245 group_index=self.group_index, 246 X=scaled_Z, 247 Y=Y, 248 y=y, 249 type_loss=self.family, 250 ) 251 252 self.beta_ = self.optimizer.results[0] 253 self.classes_ = np.unique(y) 254 255 return self 256 257 def predict(self, X, **kwargs): 258 """Predict test data X. 259 260 Args: 261 262 X: {array-like}, shape = [n_samples, n_features] 263 Training vectors, where n_samples is the number 264 of samples and n_features is the number of features. 265 266 **kwargs: additional parameters to be passed to 267 self.cook_test_set 268 269 Returns: 270 271 model predictions: {array-like} 272 273 """ 274 275 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 276 277 def predict_proba(self, X, **kwargs): 278 """Predict probabilities for test data X. 279 280 Args: 281 282 X: {array-like}, shape = [n_samples, n_features] 283 Training vectors, where n_samples is the number 284 of samples and n_features is the number of features. 285 286 **kwargs: additional parameters to be passed to 287 self.cook_test_set 288 289 Returns: 290 291 probability estimates for test data: {array-like} 292 293 """ 294 if len(X.shape) == 1: 295 n_features = X.shape[0] 296 new_X = mo.rbind( 297 X.reshape(1, n_features), 298 np.ones(n_features).reshape(1, n_features), 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot( 307 Z, 308 self.beta_.reshape( 309 self.n_classes, 310 X.shape[1] + self.n_hidden_features + self.n_clusters, 311 ).T, 312 ) 313 314 if self.family == "logit": 315 exp_ZB = np.exp(ZB) 316 317 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 318 319 if self.family == "expit": 320 exp_ZB = expit(ZB) 321 322 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 323 324 if self.family == "erf": 325 exp_ZB = 0.5 * (1 + erf(ZB)) 326 327 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 328 329 def score(self, X, y, scoring=None): 330 """Scoring function for classification. 331 332 Args: 333 334 X: {array-like}, shape = [n_samples, n_features] 335 Training vectors, where n_samples is the number 336 of samples and n_features is the number of features. 337 338 y: array-like, shape = [n_samples] 339 Target values. 340 341 scoring: str 342 scoring method (default is accuracy) 343 344 Returns: 345 346 score: float 347 """ 348 349 if scoring is None: 350 scoring = "accuracy" 351 352 if scoring == "accuracy": 353 return skm2.accuracy_score(y, self.predict(X)) 354 355 if scoring == "f1": 356 return skm2.f1_score(y, self.predict(X)) 357 358 if scoring == "precision": 359 return skm2.precision_score(y, self.predict(X)) 360 361 if scoring == "recall": 362 return skm2.recall_score(y, self.predict(X)) 363 364 if scoring == "roc_auc": 365 return skm2.roc_auc_score(y, self.predict(X)) 366 367 if scoring == "log_loss": 368 return skm2.log_loss(y, self.predict_proba(X)) 369 370 if scoring == "balanced_accuracy": 371 return skm2.balanced_accuracy_score(y, self.predict(X)) 372 373 if scoring == "average_precision": 374 return skm2.average_precision_score(y, self.predict(X)) 375 376 if scoring == "neg_brier_score": 377 return -skm2.brier_score_loss(y, self.predict_proba(X)) 378 379 if scoring == "neg_log_loss": 380 return -skm2.log_loss(y, self.predict_proba(X))
Generalized 'linear' models using quasi-randomized networks (classification)
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class Optimizer
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_classification.py
192 def fit(self, X, y, **kwargs): 193 """Fit GLM model to training data (X, y). 194 195 Args: 196 197 X: {array-like}, shape = [n_samples, n_features] 198 Training vectors, where n_samples is the number 199 of samples and n_features is the number of features. 200 201 y: array-like, shape = [n_samples] 202 Target values. 203 204 **kwargs: additional parameters to be passed to 205 self.cook_training_set or self.obj.fit 206 207 Returns: 208 209 self: object 210 211 """ 212 213 assert mx.is_factor( 214 y 215 ), "y must contain only integers" # change is_factor and subsampling everywhere 216 217 self.classes_ = np.unique(y) # for compatibility with sklearn 218 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 219 220 self.beta_ = None 221 222 n, p = X.shape 223 224 self.group_index = n * X.shape[1] 225 226 self.n_classes = len(np.unique(y)) 227 228 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 229 230 # Y = mo.one_hot_encode2(output_y, self.n_classes) 231 Y = self.optimizer.one_hot_encode(output_y, self.n_classes) 232 233 # initialization 234 beta_ = np.linalg.lstsq(scaled_Z, Y, rcond=None)[0] 235 236 # optimization 237 # fit(self, loss_func, response, x0, **kwargs): 238 # loss_func(self, beta, group_index, X, y, 239 # row_index=None, type_loss="gaussian", 240 # **kwargs) 241 self.optimizer.fit( 242 self.loss_func, 243 response=y, 244 x0=beta_.flatten(order="F"), 245 group_index=self.group_index, 246 X=scaled_Z, 247 Y=Y, 248 y=y, 249 type_loss=self.family, 250 ) 251 252 self.beta_ = self.optimizer.results[0] 253 self.classes_ = np.unique(y) 254 255 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
257 def predict(self, X, **kwargs): 258 """Predict test data X. 259 260 Args: 261 262 X: {array-like}, shape = [n_samples, n_features] 263 Training vectors, where n_samples is the number 264 of samples and n_features is the number of features. 265 266 **kwargs: additional parameters to be passed to 267 self.cook_test_set 268 269 Returns: 270 271 model predictions: {array-like} 272 273 """ 274 275 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
277 def predict_proba(self, X, **kwargs): 278 """Predict probabilities for test data X. 279 280 Args: 281 282 X: {array-like}, shape = [n_samples, n_features] 283 Training vectors, where n_samples is the number 284 of samples and n_features is the number of features. 285 286 **kwargs: additional parameters to be passed to 287 self.cook_test_set 288 289 Returns: 290 291 probability estimates for test data: {array-like} 292 293 """ 294 if len(X.shape) == 1: 295 n_features = X.shape[0] 296 new_X = mo.rbind( 297 X.reshape(1, n_features), 298 np.ones(n_features).reshape(1, n_features), 299 ) 300 301 Z = self.cook_test_set(new_X, **kwargs) 302 303 else: 304 Z = self.cook_test_set(X, **kwargs) 305 306 ZB = mo.safe_sparse_dot( 307 Z, 308 self.beta_.reshape( 309 self.n_classes, 310 X.shape[1] + self.n_hidden_features + self.n_clusters, 311 ).T, 312 ) 313 314 if self.family == "logit": 315 exp_ZB = np.exp(ZB) 316 317 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 318 319 if self.family == "expit": 320 exp_ZB = expit(ZB) 321 322 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 323 324 if self.family == "erf": 325 exp_ZB = 0.5 * (1 + erf(ZB)) 326 327 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
329 def score(self, X, y, scoring=None): 330 """Scoring function for classification. 331 332 Args: 333 334 X: {array-like}, shape = [n_samples, n_features] 335 Training vectors, where n_samples is the number 336 of samples and n_features is the number of features. 337 338 y: array-like, shape = [n_samples] 339 Target values. 340 341 scoring: str 342 scoring method (default is accuracy) 343 344 Returns: 345 346 score: float 347 """ 348 349 if scoring is None: 350 scoring = "accuracy" 351 352 if scoring == "accuracy": 353 return skm2.accuracy_score(y, self.predict(X)) 354 355 if scoring == "f1": 356 return skm2.f1_score(y, self.predict(X)) 357 358 if scoring == "precision": 359 return skm2.precision_score(y, self.predict(X)) 360 361 if scoring == "recall": 362 return skm2.recall_score(y, self.predict(X)) 363 364 if scoring == "roc_auc": 365 return skm2.roc_auc_score(y, self.predict(X)) 366 367 if scoring == "log_loss": 368 return skm2.log_loss(y, self.predict_proba(X)) 369 370 if scoring == "balanced_accuracy": 371 return skm2.balanced_accuracy_score(y, self.predict(X)) 372 373 if scoring == "average_precision": 374 return skm2.average_precision_score(y, self.predict(X)) 375 376 if scoring == "neg_brier_score": 377 return -skm2.brier_score_loss(y, self.predict_proba(X)) 378 379 if scoring == "neg_log_loss": 380 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
14class GLMRegressor(GLM, RegressorMixin): 15 """Generalized 'linear' models using quasi-randomized networks (regression) 16 17 Attributes: 18 19 n_hidden_features: int 20 number of nodes in the hidden layer 21 22 lambda1: float 23 regularization parameter for GLM coefficients on original features 24 25 alpha1: float 26 controls compromize between l1 and l2 norm of GLM coefficients on original features 27 28 lambda2: float 29 regularization parameter for GLM coefficients on nonlinear features 30 31 alpha2: float 32 controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features 33 34 family: str 35 "gaussian", "laplace" or "poisson" (for now) 36 37 activation_name: str 38 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 39 40 a: float 41 hyperparameter for 'prelu' or 'elu' activation function 42 43 nodes_sim: str 44 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 45 'uniform' 46 47 bias: boolean 48 indicates if the hidden layer contains a bias term (True) or not 49 (False) 50 51 dropout: float 52 regularization parameter; (random) percentage of nodes dropped out 53 of the training 54 55 direct_link: boolean 56 indicates if the original predictors are included (True) in model's 57 fitting or not (False) 58 59 n_clusters: int 60 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 61 no clustering) 62 63 cluster_encode: bool 64 defines how the variable containing clusters is treated (default is one-hot) 65 if `False`, then labels are used, without one-hot encoding 66 67 type_clust: str 68 type of clustering method: currently k-means ('kmeans') or Gaussian 69 Mixture Model ('gmm') 70 71 type_scaling: a tuple of 3 strings 72 scaling methods for inputs, hidden layer, and clustering respectively 73 (and when relevant). 74 Currently available: standardization ('std') or MinMax scaling ('minmax') 75 76 optimizer: object 77 optimizer, from class nnetsauce.utils.Optimizer 78 79 seed: int 80 reproducibility seed for nodes_sim=='uniform' 81 82 Attributes: 83 84 beta_: vector 85 regression coefficients 86 87 Examples: 88 89 See [https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py) 90 91 """ 92 93 # construct the object ----- 94 95 def __init__( 96 self, 97 n_hidden_features=5, 98 lambda1=0.01, 99 alpha1=0.5, 100 lambda2=0.01, 101 alpha2=0.5, 102 family="gaussian", 103 activation_name="relu", 104 a=0.01, 105 nodes_sim="sobol", 106 bias=True, 107 dropout=0, 108 direct_link=True, 109 n_clusters=2, 110 cluster_encode=True, 111 type_clust="kmeans", 112 type_scaling=("std", "std", "std"), 113 optimizer=Optimizer(), 114 seed=123, 115 ): 116 super().__init__( 117 n_hidden_features=n_hidden_features, 118 lambda1=lambda1, 119 alpha1=alpha1, 120 lambda2=lambda2, 121 alpha2=alpha2, 122 activation_name=activation_name, 123 a=a, 124 nodes_sim=nodes_sim, 125 bias=bias, 126 dropout=dropout, 127 direct_link=direct_link, 128 n_clusters=n_clusters, 129 cluster_encode=cluster_encode, 130 type_clust=type_clust, 131 type_scaling=type_scaling, 132 optimizer=optimizer, 133 seed=seed, 134 ) 135 136 self.family = family 137 138 def gaussian_loss(self, y, row_index, XB): 139 return 0.5 * np.mean(np.square(y[row_index] - XB)) 140 141 def laplace_loss(self, y, row_index, XB): 142 return 0.5 * np.mean(np.abs(y[row_index] - XB)) 143 144 def poisson_loss(self, y, row_index, XB): 145 return -np.mean(y[row_index] * XB - np.exp(XB)) 146 147 def loss_func( 148 self, beta, group_index, X, y, row_index=None, type_loss="gaussian", **kwargs 149 ): 150 res = { 151 "gaussian": self.gaussian_loss, 152 "laplace": self.laplace_loss, 153 "poisson": self.poisson_loss, 154 } 155 156 if row_index is None: 157 row_index = range(len(y)) 158 XB = self.compute_XB(X, beta=beta) 159 160 return res[type_loss](y, row_index, XB) + self.compute_penalty( 161 group_index=group_index, beta=beta 162 ) 163 164 XB = self.compute_XB(X, beta=beta, row_index=row_index) 165 166 return res[type_loss](y, row_index, XB) + self.compute_penalty( 167 group_index=group_index, beta=beta 168 ) 169 170 def fit(self, X, y, **kwargs): 171 """Fit GLM model to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 self.beta_ = None 192 193 self.n_iter = 0 194 195 n, self.group_index = X.shape 196 197 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 198 199 n_Z = scaled_Z.shape[0] 200 201 # initialization 202 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 203 204 # optimization 205 # fit(self, loss_func, response, x0, **kwargs): 206 # loss_func(self, beta, group_index, X, y, 207 # row_index=None, type_loss="gaussian", 208 # **kwargs) 209 self.optimizer.fit( 210 self.loss_func, 211 response=centered_y, 212 x0=beta_, 213 group_index=self.group_index, 214 X=scaled_Z, 215 y=centered_y, 216 type_loss=self.family, 217 **kwargs 218 ) 219 220 self.beta_ = self.optimizer.results[0] 221 222 return self 223 224 def predict(self, X, **kwargs): 225 """Predict test data X. 226 227 Args: 228 229 X: {array-like}, shape = [n_samples, n_features] 230 Training vectors, where n_samples is the number 231 of samples and n_features is the number of features. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_test_set 235 236 Returns: 237 238 model predictions: {array-like} 239 240 """ 241 242 if len(X.shape) == 1: 243 n_features = X.shape[0] 244 new_X = mo.rbind( 245 X.reshape(1, n_features), 246 np.ones(n_features).reshape(1, n_features), 247 ) 248 249 return ( 250 self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 251 )[0] 252 253 return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_) 254 255 def score(self, X, y, scoring=None): 256 """Compute the score of the model. 257 258 Parameters: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 y: array-like, shape = [n_samples] 265 Target values. 266 267 scoring: str 268 scoring method 269 270 Returns: 271 272 score: float 273 274 """ 275 276 if scoring is None: 277 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 278 279 return skm2.get_scorer(scoring)(self, X, y)
Generalized 'linear' models using quasi-randomized networks (regression)
Attributes:
n_hidden_features: int
number of nodes in the hidden layer
lambda1: float
regularization parameter for GLM coefficients on original features
alpha1: float
controls compromize between l1 and l2 norm of GLM coefficients on original features
lambda2: float
regularization parameter for GLM coefficients on nonlinear features
alpha2: float
controls compromize between l1 and l2 norm of GLM coefficients on nonlinear features
family: str
"gaussian", "laplace" or "poisson" (for now)
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
optimizer: object
optimizer, from class Optimizer
seed: int
reproducibility seed for nodes_sim=='uniform'
Attributes:
beta_: vector
regression coefficients
Examples:
See https://github.com/Techtonique/nnetsauce/blob/master/examples/glm_regression.py
170 def fit(self, X, y, **kwargs): 171 """Fit GLM model to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 self.beta_ = None 192 193 self.n_iter = 0 194 195 n, self.group_index = X.shape 196 197 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 198 199 n_Z = scaled_Z.shape[0] 200 201 # initialization 202 beta_ = np.linalg.lstsq(scaled_Z, centered_y, rcond=None)[0] 203 204 # optimization 205 # fit(self, loss_func, response, x0, **kwargs): 206 # loss_func(self, beta, group_index, X, y, 207 # row_index=None, type_loss="gaussian", 208 # **kwargs) 209 self.optimizer.fit( 210 self.loss_func, 211 response=centered_y, 212 x0=beta_, 213 group_index=self.group_index, 214 X=scaled_Z, 215 y=centered_y, 216 type_loss=self.family, 217 **kwargs 218 ) 219 220 self.beta_ = self.optimizer.results[0] 221 222 return self
Fit GLM model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
224 def predict(self, X, **kwargs): 225 """Predict test data X. 226 227 Args: 228 229 X: {array-like}, shape = [n_samples, n_features] 230 Training vectors, where n_samples is the number 231 of samples and n_features is the number of features. 232 233 **kwargs: additional parameters to be passed to 234 self.cook_test_set 235 236 Returns: 237 238 model predictions: {array-like} 239 240 """ 241 242 if len(X.shape) == 1: 243 n_features = X.shape[0] 244 new_X = mo.rbind( 245 X.reshape(1, n_features), 246 np.ones(n_features).reshape(1, n_features), 247 ) 248 249 return ( 250 self.y_mean_ + np.dot(self.cook_test_set(new_X, **kwargs), self.beta_) 251 )[0] 252 253 return self.y_mean_ + np.dot(self.cook_test_set(X, **kwargs), self.beta_)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
255 def score(self, X, y, scoring=None): 256 """Compute the score of the model. 257 258 Parameters: 259 260 X: {array-like}, shape = [n_samples, n_features] 261 Training vectors, where n_samples is the number 262 of samples and n_features is the number of features. 263 264 y: array-like, shape = [n_samples] 265 Target values. 266 267 scoring: str 268 scoring method 269 270 Returns: 271 272 score: float 273 274 """ 275 276 if scoring is None: 277 return np.sqrt(np.mean((self.predict(X) - y) ** 2)) 278 279 return skm2.get_scorer(scoring)(self, X, y)
Compute the score of the model.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method
Returns:
score: float
18class KernelRidge(BaseEstimator, RegressorMixin): 19 """ 20 Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization. 21 22 Parameters: 23 - alpha: float 24 Regularization parameter. 25 - kernel: str 26 Kernel type ("linear", "rbf", or "matern"). 27 - gamma: float 28 Kernel coefficient for "rbf". Ignored for other kernels. 29 - nu: float 30 Smoothness parameter for the Matérn kernel. Default is 1.5. 31 - length_scale: float 32 Length scale parameter for the Matérn kernel. Default is 1.0. 33 - backend: str 34 "cpu" or "gpu" (uses JAX if "gpu"). 35 """ 36 37 def __init__( 38 self, 39 alpha=1.0, 40 kernel="rbf", 41 gamma=None, 42 nu=1.5, 43 length_scale=1.0, 44 backend="cpu", 45 ): 46 self.alpha = alpha 47 self.alpha_ = alpha 48 self.kernel = kernel 49 self.gamma = gamma 50 self.nu = nu 51 self.length_scale = length_scale 52 self.backend = backend 53 self.scaler = StandardScaler() 54 55 if backend == "gpu" and not JAX_AVAILABLE: 56 raise ImportError( 57 "JAX is not installed. Please install JAX to use the GPU backend." 58 ) 59 60 def _linear_kernel(self, X, Y): 61 return jnp.dot(X, Y.T) if self.backend == "gpu" else np.dot(X, Y.T) 62 63 def _rbf_kernel(self, X, Y): 64 if self.gamma is None: 65 self.gamma = 1.0 / X.shape[1] 66 if self.backend == "gpu": 67 sq_dists = ( 68 jnp.sum(X**2, axis=1)[:, None] 69 + jnp.sum(Y**2, axis=1) 70 - 2 * jnp.dot(X, Y.T) 71 ) 72 return jnp.exp(-self.gamma * sq_dists) 73 else: 74 sq_dists = ( 75 np.sum(X**2, axis=1)[:, None] 76 + np.sum(Y**2, axis=1) 77 - 2 * np.dot(X, Y.T) 78 ) 79 return np.exp(-self.gamma * sq_dists) 80 81 def _matern_kernel(self, X, Y): 82 """ 83 Compute the Matérn kernel using JAX for GPU or NumPy for CPU. 84 85 Parameters: 86 - X: array-like, shape (n_samples_X, n_features) 87 - Y: array-like, shape (n_samples_Y, n_features) 88 89 Returns: 90 - Kernel matrix, shape (n_samples_X, n_samples_Y) 91 """ 92 if self.backend == "gpu": 93 # Compute pairwise distances 94 dists = jnp.sqrt(jnp.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)) 95 scaled_dists = jnp.sqrt(2 * self.nu) * dists / self.length_scale 96 97 # Matérn kernel formula 98 coeff = (2 ** (1 - self.nu)) / jnp.exp(gammaln(self.nu)) 99 matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 100 matern_kernel = jnp.where( 101 dists == 0, 1.0, matern_kernel 102 ) # Handle the case where distance is 0 103 return matern_kernel 104 else: 105 # Use NumPy for CPU 106 from scipy.special import ( 107 gammaln, 108 kv, 109 ) # Ensure scipy.special is used for CPU 110 111 dists = np.sqrt(np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)) 112 scaled_dists = np.sqrt(2 * self.nu) * dists / self.length_scale 113 114 # Matérn kernel formula 115 coeff = (2 ** (1 - self.nu)) / np.exp(gammaln(self.nu)) 116 matern_kernel = coeff * (scaled_dists**self.nu) * kv(self.nu, scaled_dists) 117 matern_kernel = np.where( 118 dists == 0, 1.0, matern_kernel 119 ) # Handle the case where distance is 0 120 return matern_kernel 121 122 def _get_kernel(self, X, Y): 123 if self.kernel == "linear": 124 return self._linear_kernel(X, Y) 125 elif self.kernel == "rbf": 126 return self._rbf_kernel(X, Y) 127 elif self.kernel == "matern": 128 return self._matern_kernel(X, Y) 129 else: 130 raise ValueError(f"Unsupported kernel: {self.kernel}") 131 132 def fit(self, X, y): 133 """ 134 Fit the Kernel Ridge Regression model. 135 136 Parameters: 137 - X: array-like, shape (n_samples, n_features) 138 Training data. 139 - y: array-like, shape (n_samples,) 140 Target values. 141 """ 142 # Standardize the inputs 143 X = self.scaler.fit_transform(X) 144 self.X_fit_ = X 145 146 # Center the response 147 self.y_mean_ = np.mean(y) 148 y_centered = y - self.y_mean_ 149 150 n_samples = X.shape[0] 151 152 # Compute the kernel matrix 153 K = self._get_kernel(X, X) 154 self.K_ = K 155 self.y_fit_ = y_centered 156 157 if isinstance(self.alpha, (list, np.ndarray)): 158 # If alpha is a list or array, compute LOOE for each alpha 159 self.alphas_ = self.alpha # Store the list of alphas 160 self.dual_coefs_ = [] # Store dual coefficients for each alpha 161 self.looe_ = [] # Store LOOE for each alpha 162 163 for alpha in self.alpha: 164 G = K + alpha * np.eye(n_samples) 165 G_inv = np.linalg.inv(G) 166 diag_G_inv = np.diag(G_inv) 167 dual_coef = np.linalg.solve(G, y_centered) 168 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 169 self.dual_coefs_.append(dual_coef) 170 self.looe_.append(looe) 171 172 # Select the best alpha based on the smallest LOOE 173 best_index = np.argmin(self.looe_) 174 self.alpha_ = self.alpha[best_index] 175 self.dual_coef_ = self.dual_coefs_[best_index] 176 else: 177 # If alpha is a single value, proceed as usual 178 if self.backend == "gpu": 179 self.dual_coef_ = jnp.linalg.solve( 180 K + self.alpha * jnp.eye(n_samples), y_centered 181 ) 182 else: 183 self.dual_coef_ = np.linalg.solve( 184 K + self.alpha * np.eye(n_samples), y_centered 185 ) 186 187 return self 188 189 def predict(self, X, probs=False): 190 """ 191 Predict using the Kernel Ridge Regression model. 192 193 Parameters: 194 - X: array-like, shape (n_samples, n_features) 195 Test data. 196 197 Returns: 198 - Predicted values, shape (n_samples,). 199 """ 200 # Standardize the inputs 201 X = self.scaler.transform(X) 202 K = self._get_kernel(X, self.X_fit_) 203 if self.backend == "gpu": 204 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 205 if probs: 206 # Compute similarity to self.X_fit_ 207 similarities = jnp.dot( 208 preds, self.X_fit_.T 209 ) # Shape: (n_samples, n_fit_) 210 # Apply softmax to get probabilities 211 return jaxsoftmax(similarities, axis=1) 212 return preds 213 else: 214 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 215 if probs: 216 # Compute similarity to self.X_fit_ 217 similarities = np.dot( 218 preds, self.X_fit_.T 219 ) # Shape: (n_samples, n_fit_) 220 # Apply softmax to get probabilities 221 return softmax(similarities, axis=1) 222 return preds 223 224 def partial_fit(self, X, y): 225 """ 226 Incrementally fit the Kernel Ridge Regression model with new data using a recursive approach. 227 228 Parameters: 229 - X: array-like, shape (n_samples, n_features) 230 New training data. 231 - y: array-like, shape (n_samples,) 232 New target values. 233 234 Returns: 235 - self: object 236 The updated model. 237 """ 238 # Standardize the inputs 239 X = ( 240 self.scaler.fit_transform(X) 241 if not hasattr(self, "X_fit_") 242 else self.scaler.transform(X) 243 ) 244 245 if not hasattr(self, "X_fit_"): 246 # Initialize with the first batch of data 247 self.X_fit_ = X 248 249 # Center the response 250 self.y_mean_ = np.mean(y) 251 y_centered = y - self.y_mean_ 252 self.y_fit_ = y_centered 253 254 n_samples = X.shape[0] 255 256 # Compute the kernel matrix for the initial data 257 self.K_ = self._get_kernel(X, X) 258 259 # Initialize dual coefficients for each alpha 260 if isinstance(self.alpha, (list, np.ndarray)): 261 self.dual_coefs_ = [np.zeros(n_samples) for _ in self.alpha] 262 else: 263 self.dual_coef_ = np.zeros(n_samples) 264 else: 265 # Incrementally update with new data 266 y_centered = y - self.y_mean_ # Center the new batch of responses 267 for x_new, y_new in zip(X, y_centered): 268 x_new = x_new.reshape(1, -1) # Ensure x_new is 2D 269 k_new = self._get_kernel(self.X_fit_, x_new).flatten() 270 271 # Compute the kernel value for the new data point 272 k_self = self._get_kernel(x_new, x_new).item() 273 274 if isinstance(self.alpha, (list, np.ndarray)): 275 # Update dual coefficients for each alpha 276 for idx, alpha in enumerate(self.alpha): 277 gamma_new = 1 / (k_self + alpha) 278 residual = y_new - np.dot(self.dual_coefs_[idx], k_new) 279 self.dual_coefs_[idx] = np.append( 280 self.dual_coefs_[idx], gamma_new * residual 281 ) 282 else: 283 # Update dual coefficients for a single alpha 284 gamma_new = 1 / (k_self + self.alpha) 285 residual = y_new - np.dot(self.dual_coef_, k_new) 286 self.dual_coef_ = np.append(self.dual_coef_, gamma_new * residual) 287 288 # Update the kernel matrix 289 self.K_ = np.block( 290 [[self.K_, k_new[:, None]], [k_new[None, :], np.array([[k_self]])]] 291 ) 292 293 # Update the stored data 294 self.X_fit_ = np.vstack([self.X_fit_, x_new]) 295 self.y_fit_ = np.append(self.y_fit_, y_new) 296 297 # Select the best alpha based on LOOE after the batch 298 if isinstance(self.alpha, (list, np.ndarray)): 299 self.looe_ = [] 300 for idx, alpha in enumerate(self.alpha): 301 G = self.K_ + alpha * np.eye(self.K_.shape[0]) 302 G_inv = np.linalg.inv(G) 303 diag_G_inv = np.diag(G_inv) 304 looe = np.sum((self.dual_coefs_[idx] / diag_G_inv) ** 2) 305 self.looe_.append(looe) 306 307 # Select the best alpha 308 best_index = np.argmin(self.looe_) 309 self.alpha_ = self.alpha[best_index] 310 self.dual_coef_ = self.dual_coefs_[best_index] 311 312 return self
Kernel Ridge Regression with optional GPU support, Matérn kernels, and automatic input standardization.
Parameters:
- alpha: float Regularization parameter.
- kernel: str Kernel type ("linear", "rbf", or "matern").
- gamma: float Kernel coefficient for "rbf". Ignored for other kernels.
- nu: float Smoothness parameter for the Matérn kernel. Default is 1.5.
- length_scale: float Length scale parameter for the Matérn kernel. Default is 1.0.
- backend: str "cpu" or "gpu" (uses JAX if "gpu").
132 def fit(self, X, y): 133 """ 134 Fit the Kernel Ridge Regression model. 135 136 Parameters: 137 - X: array-like, shape (n_samples, n_features) 138 Training data. 139 - y: array-like, shape (n_samples,) 140 Target values. 141 """ 142 # Standardize the inputs 143 X = self.scaler.fit_transform(X) 144 self.X_fit_ = X 145 146 # Center the response 147 self.y_mean_ = np.mean(y) 148 y_centered = y - self.y_mean_ 149 150 n_samples = X.shape[0] 151 152 # Compute the kernel matrix 153 K = self._get_kernel(X, X) 154 self.K_ = K 155 self.y_fit_ = y_centered 156 157 if isinstance(self.alpha, (list, np.ndarray)): 158 # If alpha is a list or array, compute LOOE for each alpha 159 self.alphas_ = self.alpha # Store the list of alphas 160 self.dual_coefs_ = [] # Store dual coefficients for each alpha 161 self.looe_ = [] # Store LOOE for each alpha 162 163 for alpha in self.alpha: 164 G = K + alpha * np.eye(n_samples) 165 G_inv = np.linalg.inv(G) 166 diag_G_inv = np.diag(G_inv) 167 dual_coef = np.linalg.solve(G, y_centered) 168 looe = np.sum((dual_coef / diag_G_inv) ** 2) # Compute LOOE 169 self.dual_coefs_.append(dual_coef) 170 self.looe_.append(looe) 171 172 # Select the best alpha based on the smallest LOOE 173 best_index = np.argmin(self.looe_) 174 self.alpha_ = self.alpha[best_index] 175 self.dual_coef_ = self.dual_coefs_[best_index] 176 else: 177 # If alpha is a single value, proceed as usual 178 if self.backend == "gpu": 179 self.dual_coef_ = jnp.linalg.solve( 180 K + self.alpha * jnp.eye(n_samples), y_centered 181 ) 182 else: 183 self.dual_coef_ = np.linalg.solve( 184 K + self.alpha * np.eye(n_samples), y_centered 185 ) 186 187 return self
Fit the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Training data.
- y: array-like, shape (n_samples,) Target values.
189 def predict(self, X, probs=False): 190 """ 191 Predict using the Kernel Ridge Regression model. 192 193 Parameters: 194 - X: array-like, shape (n_samples, n_features) 195 Test data. 196 197 Returns: 198 - Predicted values, shape (n_samples,). 199 """ 200 # Standardize the inputs 201 X = self.scaler.transform(X) 202 K = self._get_kernel(X, self.X_fit_) 203 if self.backend == "gpu": 204 preds = jnp.dot(K, self.dual_coef_) + self.y_mean_ 205 if probs: 206 # Compute similarity to self.X_fit_ 207 similarities = jnp.dot( 208 preds, self.X_fit_.T 209 ) # Shape: (n_samples, n_fit_) 210 # Apply softmax to get probabilities 211 return jaxsoftmax(similarities, axis=1) 212 return preds 213 else: 214 preds = np.dot(K, self.dual_coef_) + self.y_mean_ 215 if probs: 216 # Compute similarity to self.X_fit_ 217 similarities = np.dot( 218 preds, self.X_fit_.T 219 ) # Shape: (n_samples, n_fit_) 220 # Apply softmax to get probabilities 221 return softmax(similarities, axis=1) 222 return preds
Predict using the Kernel Ridge Regression model.
Parameters:
- X: array-like, shape (n_samples, n_features) Test data.
Returns:
- Predicted values, shape (n_samples,).
751class LazyClassifier(LazyDeepClassifier): 752 """ 753 Fitting -- almost -- all the classification algorithms with 754 nnetsauce's CustomClassifier and returning their scores (no layers). 755 756 Parameters: 757 758 verbose: int, optional (default=0) 759 Any positive number for verbosity. 760 761 ignore_warnings: bool, optional (default=True) 762 When set to True, the warning related to algorigms that are not able to run are ignored. 763 764 custom_metric: function, optional (default=None) 765 When function is provided, models are evaluated based on the custom evaluation metric provided. 766 767 predictions: bool, optional (default=False) 768 When set to True, the predictions of all the models models are returned as dataframe. 769 770 sort_by: string, optional (default='Accuracy') 771 Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score' 772 or a custom metric identified by its name and provided by custom_metric. 773 774 random_state: int, optional (default=42) 775 Reproducibiility seed. 776 777 estimators: list, optional (default='all') 778 list of Estimators names or just 'all' (default='all') 779 780 preprocess: bool 781 preprocessing is done when set to True 782 783 n_jobs : int, when possible, run in parallel 784 For now, only used by individual models that support it. 785 786 All the other parameters are the same as CustomClassifier's. 787 788 Attributes: 789 790 models_: dict-object 791 Returns a dictionary with each model pipeline as value 792 with key as name of models. 793 794 best_model_: object 795 Returns the best model pipeline based on the sort_by metric. 796 797 Examples: 798 799 import nnetsauce as ns 800 import numpy as np 801 from sklearn import datasets 802 from sklearn.utils import shuffle 803 804 dataset = datasets.load_iris() 805 X = dataset.data 806 y = dataset.target 807 X, y = shuffle(X, y, random_state=123) 808 X = X.astype(np.float32) 809 y = y.astype(np.float32) 810 X_train, X_test = X[:100], X[100:] 811 y_train, y_test = y[:100], y[100:] 812 813 clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 814 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 815 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 816 print(models) 817 818 """ 819 820 def __init__( 821 self, 822 verbose=0, 823 ignore_warnings=True, 824 custom_metric=None, 825 predictions=False, 826 sort_by="Accuracy", 827 random_state=42, 828 estimators="all", 829 preprocess=False, 830 n_jobs=None, 831 # CustomClassifier attributes 832 obj=None, 833 n_hidden_features=5, 834 activation_name="relu", 835 a=0.01, 836 nodes_sim="sobol", 837 bias=True, 838 dropout=0, 839 direct_link=True, 840 n_clusters=2, 841 cluster_encode=True, 842 type_clust="kmeans", 843 type_scaling=("std", "std", "std"), 844 col_sample=1, 845 row_sample=1, 846 seed=123, 847 backend="cpu", 848 ): 849 super().__init__( 850 verbose=verbose, 851 ignore_warnings=ignore_warnings, 852 custom_metric=custom_metric, 853 predictions=predictions, 854 sort_by=sort_by, 855 random_state=random_state, 856 estimators=estimators, 857 preprocess=preprocess, 858 n_jobs=n_jobs, 859 n_layers=1, 860 obj=obj, 861 n_hidden_features=n_hidden_features, 862 activation_name=activation_name, 863 a=a, 864 nodes_sim=nodes_sim, 865 bias=bias, 866 dropout=dropout, 867 direct_link=direct_link, 868 n_clusters=n_clusters, 869 cluster_encode=cluster_encode, 870 type_clust=type_clust, 871 type_scaling=type_scaling, 872 col_sample=col_sample, 873 row_sample=row_sample, 874 seed=seed, 875 backend=backend, 876 )
Fitting -- almost -- all the classification algorithms with nnetsauce's CustomClassifier and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy', 'Balanced Accuracy', 'ROC AUC', 'F1 Score'
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
X, y = shuffle(X, y, random_state=123)
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test = X[:100], X[100:]
y_train, y_test = y[:100], y[100:]
clf = ns.LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
646class LazyRegressor(LazyDeepRegressor): 647 """ 648 Fitting -- almost -- all the regression algorithms with 649 nnetsauce's CustomRegressor and returning their scores. 650 651 Parameters: 652 653 verbose: int, optional (default=0) 654 Any positive number for verbosity. 655 656 ignore_warnings: bool, optional (default=True) 657 When set to True, the warning related to algorigms that are not able to run are ignored. 658 659 custom_metric: function, optional (default=None) 660 When function is provided, models are evaluated based on the custom evaluation metric provided. 661 662 predictions: bool, optional (default=False) 663 When set to True, the predictions of all the models models are returned as dataframe. 664 665 sort_by: string, optional (default='RMSE') 666 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 667 or a custom metric identified by its name and provided by custom_metric. 668 669 random_state: int, optional (default=42) 670 Reproducibiility seed. 671 672 estimators: list, optional (default='all') 673 list of Estimators names or just 'all' (default='all') 674 675 preprocess: bool 676 preprocessing is done when set to True 677 678 n_jobs : int, when possible, run in parallel 679 For now, only used by individual models that support it. 680 681 All the other parameters are the same as CustomRegressor's. 682 683 Attributes: 684 685 models_: dict-object 686 Returns a dictionary with each model pipeline as value 687 with key as name of models. 688 689 best_model_: object 690 Returns the best model pipeline based on the sort_by metric. 691 692 Examples: 693 694 import nnetsauce as ns 695 import numpy as np 696 from sklearn import datasets 697 from sklearn.utils import shuffle 698 699 diabetes = datasets.load_diabetes() 700 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 701 X = X.astype(np.float32) 702 703 offset = int(X.shape[0] * 0.9) 704 X_train, y_train = X[:offset], y[:offset] 705 X_test, y_test = X[offset:], y[offset:] 706 707 reg = ns.LazyRegressor(verbose=0, ignore_warnings=False, 708 custom_metric=None) 709 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 710 print(models) 711 712 """ 713 714 def __init__( 715 self, 716 verbose=0, 717 ignore_warnings=True, 718 custom_metric=None, 719 predictions=False, 720 sort_by="RMSE", 721 random_state=42, 722 estimators="all", 723 preprocess=False, 724 n_jobs=None, 725 # CustomRegressor attributes 726 obj=None, 727 n_hidden_features=5, 728 activation_name="relu", 729 a=0.01, 730 nodes_sim="sobol", 731 bias=True, 732 dropout=0, 733 direct_link=True, 734 n_clusters=2, 735 cluster_encode=True, 736 type_clust="kmeans", 737 type_scaling=("std", "std", "std"), 738 col_sample=1, 739 row_sample=1, 740 seed=123, 741 backend="cpu", 742 ): 743 super().__init__( 744 verbose=verbose, 745 ignore_warnings=ignore_warnings, 746 custom_metric=custom_metric, 747 predictions=predictions, 748 sort_by=sort_by, 749 random_state=random_state, 750 estimators=estimators, 751 preprocess=preprocess, 752 n_jobs=n_jobs, 753 n_layers=1, 754 obj=obj, 755 n_hidden_features=n_hidden_features, 756 activation_name=activation_name, 757 a=a, 758 nodes_sim=nodes_sim, 759 bias=bias, 760 dropout=dropout, 761 direct_link=direct_link, 762 n_clusters=n_clusters, 763 cluster_encode=cluster_encode, 764 type_clust=type_clust, 765 type_scaling=type_scaling, 766 col_sample=col_sample, 767 row_sample=row_sample, 768 seed=seed, 769 backend=backend, 770 )
Fitting -- almost -- all the regression algorithms with nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyRegressor(verbose=0, ignore_warnings=False,
custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
94class LazyDeepClassifier(Custom, ClassifierMixin): 95 """ 96 97 Fitting -- almost -- all the classification algorithms with layers of 98 nnetsauce's CustomClassifier and returning their scores. 99 100 Parameters: 101 102 verbose: int, optional (default=0) 103 Any positive number for verbosity. 104 105 ignore_warnings: bool, optional (default=True) 106 When set to True, the warning related to algorigms that are not 107 able to run are ignored. 108 109 custom_metric: function, optional (default=None) 110 When function is provided, models are evaluated based on the custom 111 evaluation metric provided. 112 113 predictions: bool, optional (default=False) 114 When set to True, the predictions of all the models models are 115 returned as data frame. 116 117 sort_by: string, optional (default='Accuracy') 118 Sort models by a metric. Available options are 'Accuracy', 119 'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric 120 identified by its name and provided by custom_metric. 121 122 random_state: int, optional (default=42) 123 Reproducibiility seed. 124 125 estimators: list, optional (default='all') 126 list of Estimators names or just 'all' for > 90 classifiers 127 (default='all') 128 129 preprocess: bool, preprocessing is done when set to True 130 131 n_jobs: int, when possible, run in parallel 132 For now, only used by individual models that support it. 133 134 n_layers: int, optional (default=3) 135 Number of layers of CustomClassifiers to be used. 136 137 All the other parameters are the same as CustomClassifier's. 138 139 Attributes: 140 141 models_: dict-object 142 Returns a dictionary with each model pipeline as value 143 with key as name of models. 144 145 best_model_: object 146 Returns the best model pipeline. 147 148 Examples 149 150 ```python 151 import nnetsauce as ns 152 from sklearn.datasets import load_breast_cancer 153 from sklearn.model_selection import train_test_split 154 data = load_breast_cancer() 155 X = data.data 156 y= data.target 157 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, 158 random_state=123) 159 clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None) 160 models, predictions = clf.fit(X_train, X_test, y_train, y_test) 161 model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test) 162 print(models) 163 ``` 164 165 """ 166 167 def __init__( 168 self, 169 verbose=0, 170 ignore_warnings=True, 171 custom_metric=None, 172 predictions=False, 173 sort_by="Accuracy", 174 random_state=42, 175 estimators="all", 176 preprocess=False, 177 n_jobs=None, 178 # Defining depth 179 n_layers=3, 180 # CustomClassifier attributes 181 obj=None, 182 n_hidden_features=5, 183 activation_name="relu", 184 a=0.01, 185 nodes_sim="sobol", 186 bias=True, 187 dropout=0, 188 direct_link=True, 189 n_clusters=2, 190 cluster_encode=True, 191 type_clust="kmeans", 192 type_scaling=("std", "std", "std"), 193 col_sample=1, 194 row_sample=1, 195 seed=123, 196 backend="cpu", 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers - 1 209 self.n_jobs = n_jobs 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 col_sample=col_sample, 224 row_sample=row_sample, 225 seed=seed, 226 backend=backend, 227 ) 228 229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = {} # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 ) 442 443 else: 444 layer_clf = CustomClassifier( 445 obj=model(), 446 n_hidden_features=self.n_hidden_features, 447 activation_name=self.activation_name, 448 a=self.a, 449 nodes_sim=self.nodes_sim, 450 bias=self.bias, 451 dropout=self.dropout, 452 direct_link=self.direct_link, 453 n_clusters=self.n_clusters, 454 cluster_encode=self.cluster_encode, 455 type_clust=self.type_clust, 456 type_scaling=self.type_scaling, 457 col_sample=self.col_sample, 458 row_sample=self.row_sample, 459 seed=self.seed, 460 backend=self.backend, 461 ) 462 463 layer_clf.fit(X_train, y_train) 464 465 for _ in range(self.n_layers): 466 layer_clf = deepcopy( 467 CustomClassifier( 468 obj=layer_clf, 469 n_hidden_features=self.n_hidden_features, 470 activation_name=self.activation_name, 471 a=self.a, 472 nodes_sim=self.nodes_sim, 473 bias=self.bias, 474 dropout=self.dropout, 475 direct_link=self.direct_link, 476 n_clusters=self.n_clusters, 477 cluster_encode=self.cluster_encode, 478 type_clust=self.type_clust, 479 type_scaling=self.type_scaling, 480 col_sample=self.col_sample, 481 row_sample=self.row_sample, 482 seed=self.seed, 483 backend=self.backend, 484 ) 485 ) 486 487 pipe = Pipeline( 488 [ 489 ("preprocessor", preprocessor), 490 ("classifier", layer_clf), 491 ] 492 ) 493 494 pipe.fit(X_train, y_train) 495 self.models_[name] = pipe 496 y_pred = pipe.predict(X_test) 497 accuracy = accuracy_score(y_test, y_pred, normalize=True) 498 b_accuracy = balanced_accuracy_score(y_test, y_pred) 499 f1 = f1_score(y_test, y_pred, average="weighted") 500 try: 501 roc_auc = roc_auc_score(y_test, y_pred) 502 except Exception as exception: 503 roc_auc = None 504 if self.ignore_warnings is False: 505 print("ROC AUC couldn't be calculated for " + name) 506 print(exception) 507 names.append(name) 508 Accuracy.append(accuracy) 509 B_Accuracy.append(b_accuracy) 510 ROC_AUC.append(roc_auc) 511 F1.append(f1) 512 TIME.append(time.time() - start) 513 if self.custom_metric is not None: 514 custom_metric = self.custom_metric(y_test, y_pred) 515 CUSTOM_METRIC.append(custom_metric) 516 if self.verbose > 0: 517 if self.custom_metric is not None: 518 print( 519 { 520 "Model": name, 521 "Accuracy": accuracy, 522 "Balanced Accuracy": b_accuracy, 523 "ROC AUC": roc_auc, 524 "F1 Score": f1, 525 self.custom_metric.__name__: custom_metric, 526 "Time taken": time.time() - start, 527 } 528 ) 529 else: 530 print( 531 { 532 "Model": name, 533 "Accuracy": accuracy, 534 "Balanced Accuracy": b_accuracy, 535 "ROC AUC": roc_auc, 536 "F1 Score": f1, 537 "Time taken": time.time() - start, 538 } 539 ) 540 if self.predictions: 541 predictions[name] = y_pred 542 except Exception as exception: 543 if self.ignore_warnings is False: 544 print(name + " model failed to execute") 545 print(exception) 546 547 else: # no preprocessing 548 549 for name, model in tqdm(self.classifiers): # do parallel exec 550 start = time.time() 551 try: 552 if "random_state" in model().get_params().keys(): 553 layer_clf = CustomClassifier( 554 obj=model(random_state=self.random_state), 555 n_hidden_features=self.n_hidden_features, 556 activation_name=self.activation_name, 557 a=self.a, 558 nodes_sim=self.nodes_sim, 559 bias=self.bias, 560 dropout=self.dropout, 561 direct_link=self.direct_link, 562 n_clusters=self.n_clusters, 563 cluster_encode=self.cluster_encode, 564 type_clust=self.type_clust, 565 type_scaling=self.type_scaling, 566 col_sample=self.col_sample, 567 row_sample=self.row_sample, 568 seed=self.seed, 569 backend=self.backend, 570 ) 571 572 else: 573 layer_clf = CustomClassifier( 574 obj=model(), 575 n_hidden_features=self.n_hidden_features, 576 activation_name=self.activation_name, 577 a=self.a, 578 nodes_sim=self.nodes_sim, 579 bias=self.bias, 580 dropout=self.dropout, 581 direct_link=self.direct_link, 582 n_clusters=self.n_clusters, 583 cluster_encode=self.cluster_encode, 584 type_clust=self.type_clust, 585 type_scaling=self.type_scaling, 586 col_sample=self.col_sample, 587 row_sample=self.row_sample, 588 seed=self.seed, 589 backend=self.backend, 590 ) 591 592 layer_clf.fit(X_train, y_train) 593 594 for _ in range(self.n_layers): 595 layer_clf = deepcopy( 596 CustomClassifier( 597 obj=layer_clf, 598 n_hidden_features=self.n_hidden_features, 599 activation_name=self.activation_name, 600 a=self.a, 601 nodes_sim=self.nodes_sim, 602 bias=self.bias, 603 dropout=self.dropout, 604 direct_link=self.direct_link, 605 n_clusters=self.n_clusters, 606 cluster_encode=self.cluster_encode, 607 type_clust=self.type_clust, 608 type_scaling=self.type_scaling, 609 col_sample=self.col_sample, 610 row_sample=self.row_sample, 611 seed=self.seed, 612 backend=self.backend, 613 ) 614 ) 615 616 # layer_clf.fit(X_train, y_train) 617 618 layer_clf.fit(X_train, y_train) 619 620 self.models_[name] = layer_clf 621 y_pred = layer_clf.predict(X_test) 622 accuracy = accuracy_score(y_test, y_pred, normalize=True) 623 b_accuracy = balanced_accuracy_score(y_test, y_pred) 624 f1 = f1_score(y_test, y_pred, average="weighted") 625 try: 626 roc_auc = roc_auc_score(y_test, y_pred) 627 except Exception as exception: 628 roc_auc = None 629 if self.ignore_warnings is False: 630 print("ROC AUC couldn't be calculated for " + name) 631 print(exception) 632 names.append(name) 633 Accuracy.append(accuracy) 634 B_Accuracy.append(b_accuracy) 635 ROC_AUC.append(roc_auc) 636 F1.append(f1) 637 TIME.append(time.time() - start) 638 if self.custom_metric is not None: 639 custom_metric = self.custom_metric(y_test, y_pred) 640 CUSTOM_METRIC.append(custom_metric) 641 if self.verbose > 0: 642 if self.custom_metric is not None: 643 print( 644 { 645 "Model": name, 646 "Accuracy": accuracy, 647 "Balanced Accuracy": b_accuracy, 648 "ROC AUC": roc_auc, 649 "F1 Score": f1, 650 self.custom_metric.__name__: custom_metric, 651 "Time taken": time.time() - start, 652 } 653 ) 654 else: 655 print( 656 { 657 "Model": name, 658 "Accuracy": accuracy, 659 "Balanced Accuracy": b_accuracy, 660 "ROC AUC": roc_auc, 661 "F1 Score": f1, 662 "Time taken": time.time() - start, 663 } 664 ) 665 if self.predictions: 666 predictions[name] = y_pred 667 except Exception as exception: 668 if self.ignore_warnings is False: 669 print(name + " model failed to execute") 670 print(exception) 671 672 if self.custom_metric is None: 673 scores = pd.DataFrame( 674 { 675 "Model": names, 676 "Accuracy": Accuracy, 677 "Balanced Accuracy": B_Accuracy, 678 "ROC AUC": ROC_AUC, 679 "F1 Score": F1, 680 "Time Taken": TIME, 681 } 682 ) 683 else: 684 scores = pd.DataFrame( 685 { 686 "Model": names, 687 "Accuracy": Accuracy, 688 "Balanced Accuracy": B_Accuracy, 689 "ROC AUC": ROC_AUC, 690 "F1 Score": F1, 691 "Custom metric": CUSTOM_METRIC, 692 "Time Taken": TIME, 693 } 694 ) 695 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model") 696 697 self.best_model_ = self.models_[scores.index[0]] 698 699 if self.predictions is True: 700 701 return scores, predictions 702 703 return scores 704 705 def get_best_model(self): 706 """ 707 This function returns the best model pipeline based on the sort_by metric. 708 709 Returns: 710 711 best_model: object, 712 Returns the best model pipeline based on the sort_by metric. 713 714 """ 715 return self.best_model_ 716 717 def provide_models(self, X_train, X_test, y_train, y_test): 718 """Returns all the model objects trained. If fit hasn't been called yet, 719 then it's called to return the models. 720 721 Parameters: 722 723 X_train: array-like, 724 Training vectors, where rows is the number of samples 725 and columns is the number of features. 726 727 X_test: array-like, 728 Testing vectors, where rows is the number of samples 729 and columns is the number of features. 730 731 y_train: array-like, 732 Training vectors, where rows is the number of samples 733 and columns is the number of features. 734 735 y_test: array-like, 736 Testing vectors, where rows is the number of samples 737 and columns is the number of features. 738 739 Returns: 740 741 models: dict-object, 742 Returns a dictionary with each model's pipeline as value 743 and key = name of the model. 744 """ 745 if len(self.models_.keys()) == 0: 746 self.fit(X_train, X_test, y_train, y_test) 747 748 return self.models_
Fitting -- almost -- all the classification algorithms with layers of nnetsauce's CustomClassifier and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are
returned as data frame.
sort_by: string, optional (default='Accuracy')
Sort models by a metric. Available options are 'Accuracy',
'Balanced Accuracy', 'ROC AUC', 'F1 Score' or a custom metric
identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' for > 90 classifiers
(default='all')
preprocess: bool, preprocessing is done when set to True
n_jobs: int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomClassifiers to be used.
All the other parameters are the same as CustomClassifier's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline.
Examples
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
data = load_breast_cancer()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2,
random_state=123)
clf = ns.LazyDeepClassifier(verbose=0, ignore_warnings=True, custom_metric=None)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
model_dictionary = clf.provide_models(X_train,X_test,y_train,y_test)
print(models)
229 def fit(self, X_train, X_test, y_train, y_test): 230 """Fit classifiers to X_train and y_train, predict and score on X_test, 231 y_test. 232 233 Parameters: 234 235 X_train: array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 X_test: array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 y_train: array-like, 244 Training vectors, where rows is the number of samples 245 and columns is the number of features. 246 247 y_test: array-like, 248 Testing vectors, where rows is the number of samples 249 and columns is the number of features. 250 251 Returns: 252 253 scores: Pandas DataFrame 254 Returns metrics of all the models in a Pandas DataFrame. 255 256 predictions: Pandas DataFrame 257 Returns predictions of all the models in a Pandas DataFrame. 258 """ 259 Accuracy = [] 260 B_Accuracy = [] 261 ROC_AUC = [] 262 F1 = [] 263 names = [] 264 TIME = [] 265 predictions = {} 266 267 if self.custom_metric is not None: 268 CUSTOM_METRIC = [] 269 270 if isinstance(X_train, np.ndarray): 271 X_train = pd.DataFrame(X_train) 272 X_test = pd.DataFrame(X_test) 273 274 numeric_features = X_train.select_dtypes(include=[np.number]).columns 275 categorical_features = X_train.select_dtypes(include=["object"]).columns 276 277 categorical_low, categorical_high = get_card_split( 278 X_train, categorical_features 279 ) 280 281 if self.preprocess is True: 282 preprocessor = ColumnTransformer( 283 transformers=[ 284 ("numeric", numeric_transformer, numeric_features), 285 ( 286 "categorical_low", 287 categorical_transformer_low, 288 categorical_low, 289 ), 290 ( 291 "categorical_high", 292 categorical_transformer_high, 293 categorical_high, 294 ), 295 ] 296 ) 297 298 # baseline models 299 try: 300 baseline_names = ["RandomForestClassifier", "XGBClassifier"] 301 baseline_models = [RandomForestClassifier(), xgb.XGBClassifier()] 302 except Exception as exception: 303 baseline_names = ["RandomForestClassifier"] 304 baseline_models = [RandomForestClassifier()] 305 306 for name, model in zip(baseline_names, baseline_models): 307 start = time.time() 308 try: 309 model.fit(X_train, y_train) 310 self.models_[name] = model 311 y_pred = model.predict(X_test) 312 accuracy = accuracy_score(y_test, y_pred, normalize=True) 313 b_accuracy = balanced_accuracy_score(y_test, y_pred) 314 f1 = f1_score(y_test, y_pred, average="weighted") 315 try: 316 roc_auc = roc_auc_score(y_test, y_pred) 317 except Exception as exception: 318 roc_auc = None 319 if self.ignore_warnings is False: 320 print("ROC AUC couldn't be calculated for " + name) 321 print(exception) 322 names.append(name) 323 Accuracy.append(accuracy) 324 B_Accuracy.append(b_accuracy) 325 ROC_AUC.append(roc_auc) 326 F1.append(f1) 327 TIME.append(time.time() - start) 328 if self.custom_metric is not None: 329 custom_metric = self.custom_metric(y_test, y_pred) 330 CUSTOM_METRIC.append(custom_metric) 331 if self.verbose > 0: 332 if self.custom_metric is not None: 333 print( 334 { 335 "Model": name, 336 "Accuracy": accuracy, 337 "Balanced Accuracy": b_accuracy, 338 "ROC AUC": roc_auc, 339 "F1 Score": f1, 340 self.custom_metric.__name__: custom_metric, 341 "Time taken": time.time() - start, 342 } 343 ) 344 else: 345 print( 346 { 347 "Model": name, 348 "Accuracy": accuracy, 349 "Balanced Accuracy": b_accuracy, 350 "ROC AUC": roc_auc, 351 "F1 Score": f1, 352 "Time taken": time.time() - start, 353 } 354 ) 355 if self.predictions: 356 predictions[name] = y_pred 357 except Exception as exception: 358 if self.ignore_warnings is False: 359 print(name + " model failed to execute") 360 print(exception) 361 362 if self.estimators == "all": 363 self.classifiers = [ 364 item 365 for sublist in [ 366 DEEPCLASSIFIERS, 367 DEEPMULTITASKCLASSIFIERS, 368 DEEPSIMPLEMULTITASKCLASSIFIERS, 369 ] 370 for item in sublist 371 ] 372 else: 373 self.classifiers = ( 374 [ 375 ("DeepCustomClassifier(" + est[0] + ")", est[1]) 376 for est in all_estimators() 377 if ( 378 issubclass(est[1], ClassifierMixin) 379 and (est[0] in self.estimators) 380 ) 381 ] 382 + [ 383 ( 384 "DeepMultitaskClassifier(" + est[0] + ")", 385 partial(MultitaskClassifier, obj=est[1]()), 386 ) 387 for est in all_estimators() 388 if ( 389 issubclass(est[1], RegressorMixin) 390 and (est[0] in self.estimators) 391 ) 392 ] 393 + [ 394 ( 395 "DeepSimpleMultitaskClassifier(" + est[0] + ")", 396 partial(SimpleMultitaskClassifier, obj=est[1]()), 397 ) 398 for est in all_estimators() 399 if ( 400 issubclass(est[1], RegressorMixin) 401 and (est[0] in self.estimators) 402 ) 403 ] 404 ) 405 406 if self.preprocess is True: 407 408 for name, model in tqdm(self.classifiers): # do parallel exec 409 410 other_args = {} # use this trick for `random_state` too --> refactor 411 try: 412 if ( 413 "n_jobs" in model().get_params().keys() 414 and name.find("LogisticRegression") == -1 415 ): 416 other_args["n_jobs"] = self.n_jobs 417 except Exception: 418 pass 419 420 start = time.time() 421 422 try: 423 if "random_state" in model().get_params().keys(): 424 layer_clf = CustomClassifier( 425 obj=model(random_state=self.random_state), 426 n_hidden_features=self.n_hidden_features, 427 activation_name=self.activation_name, 428 a=self.a, 429 nodes_sim=self.nodes_sim, 430 bias=self.bias, 431 dropout=self.dropout, 432 direct_link=self.direct_link, 433 n_clusters=self.n_clusters, 434 cluster_encode=self.cluster_encode, 435 type_clust=self.type_clust, 436 type_scaling=self.type_scaling, 437 col_sample=self.col_sample, 438 row_sample=self.row_sample, 439 seed=self.seed, 440 backend=self.backend, 441 ) 442 443 else: 444 layer_clf = CustomClassifier( 445 obj=model(), 446 n_hidden_features=self.n_hidden_features, 447 activation_name=self.activation_name, 448 a=self.a, 449 nodes_sim=self.nodes_sim, 450 bias=self.bias, 451 dropout=self.dropout, 452 direct_link=self.direct_link, 453 n_clusters=self.n_clusters, 454 cluster_encode=self.cluster_encode, 455 type_clust=self.type_clust, 456 type_scaling=self.type_scaling, 457 col_sample=self.col_sample, 458 row_sample=self.row_sample, 459 seed=self.seed, 460 backend=self.backend, 461 ) 462 463 layer_clf.fit(X_train, y_train) 464 465 for _ in range(self.n_layers): 466 layer_clf = deepcopy( 467 CustomClassifier( 468 obj=layer_clf, 469 n_hidden_features=self.n_hidden_features, 470 activation_name=self.activation_name, 471 a=self.a, 472 nodes_sim=self.nodes_sim, 473 bias=self.bias, 474 dropout=self.dropout, 475 direct_link=self.direct_link, 476 n_clusters=self.n_clusters, 477 cluster_encode=self.cluster_encode, 478 type_clust=self.type_clust, 479 type_scaling=self.type_scaling, 480 col_sample=self.col_sample, 481 row_sample=self.row_sample, 482 seed=self.seed, 483 backend=self.backend, 484 ) 485 ) 486 487 pipe = Pipeline( 488 [ 489 ("preprocessor", preprocessor), 490 ("classifier", layer_clf), 491 ] 492 ) 493 494 pipe.fit(X_train, y_train) 495 self.models_[name] = pipe 496 y_pred = pipe.predict(X_test) 497 accuracy = accuracy_score(y_test, y_pred, normalize=True) 498 b_accuracy = balanced_accuracy_score(y_test, y_pred) 499 f1 = f1_score(y_test, y_pred, average="weighted") 500 try: 501 roc_auc = roc_auc_score(y_test, y_pred) 502 except Exception as exception: 503 roc_auc = None 504 if self.ignore_warnings is False: 505 print("ROC AUC couldn't be calculated for " + name) 506 print(exception) 507 names.append(name) 508 Accuracy.append(accuracy) 509 B_Accuracy.append(b_accuracy) 510 ROC_AUC.append(roc_auc) 511 F1.append(f1) 512 TIME.append(time.time() - start) 513 if self.custom_metric is not None: 514 custom_metric = self.custom_metric(y_test, y_pred) 515 CUSTOM_METRIC.append(custom_metric) 516 if self.verbose > 0: 517 if self.custom_metric is not None: 518 print( 519 { 520 "Model": name, 521 "Accuracy": accuracy, 522 "Balanced Accuracy": b_accuracy, 523 "ROC AUC": roc_auc, 524 "F1 Score": f1, 525 self.custom_metric.__name__: custom_metric, 526 "Time taken": time.time() - start, 527 } 528 ) 529 else: 530 print( 531 { 532 "Model": name, 533 "Accuracy": accuracy, 534 "Balanced Accuracy": b_accuracy, 535 "ROC AUC": roc_auc, 536 "F1 Score": f1, 537 "Time taken": time.time() - start, 538 } 539 ) 540 if self.predictions: 541 predictions[name] = y_pred 542 except Exception as exception: 543 if self.ignore_warnings is False: 544 print(name + " model failed to execute") 545 print(exception) 546 547 else: # no preprocessing 548 549 for name, model in tqdm(self.classifiers): # do parallel exec 550 start = time.time() 551 try: 552 if "random_state" in model().get_params().keys(): 553 layer_clf = CustomClassifier( 554 obj=model(random_state=self.random_state), 555 n_hidden_features=self.n_hidden_features, 556 activation_name=self.activation_name, 557 a=self.a, 558 nodes_sim=self.nodes_sim, 559 bias=self.bias, 560 dropout=self.dropout, 561 direct_link=self.direct_link, 562 n_clusters=self.n_clusters, 563 cluster_encode=self.cluster_encode, 564 type_clust=self.type_clust, 565 type_scaling=self.type_scaling, 566 col_sample=self.col_sample, 567 row_sample=self.row_sample, 568 seed=self.seed, 569 backend=self.backend, 570 ) 571 572 else: 573 layer_clf = CustomClassifier( 574 obj=model(), 575 n_hidden_features=self.n_hidden_features, 576 activation_name=self.activation_name, 577 a=self.a, 578 nodes_sim=self.nodes_sim, 579 bias=self.bias, 580 dropout=self.dropout, 581 direct_link=self.direct_link, 582 n_clusters=self.n_clusters, 583 cluster_encode=self.cluster_encode, 584 type_clust=self.type_clust, 585 type_scaling=self.type_scaling, 586 col_sample=self.col_sample, 587 row_sample=self.row_sample, 588 seed=self.seed, 589 backend=self.backend, 590 ) 591 592 layer_clf.fit(X_train, y_train) 593 594 for _ in range(self.n_layers): 595 layer_clf = deepcopy( 596 CustomClassifier( 597 obj=layer_clf, 598 n_hidden_features=self.n_hidden_features, 599 activation_name=self.activation_name, 600 a=self.a, 601 nodes_sim=self.nodes_sim, 602 bias=self.bias, 603 dropout=self.dropout, 604 direct_link=self.direct_link, 605 n_clusters=self.n_clusters, 606 cluster_encode=self.cluster_encode, 607 type_clust=self.type_clust, 608 type_scaling=self.type_scaling, 609 col_sample=self.col_sample, 610 row_sample=self.row_sample, 611 seed=self.seed, 612 backend=self.backend, 613 ) 614 ) 615 616 # layer_clf.fit(X_train, y_train) 617 618 layer_clf.fit(X_train, y_train) 619 620 self.models_[name] = layer_clf 621 y_pred = layer_clf.predict(X_test) 622 accuracy = accuracy_score(y_test, y_pred, normalize=True) 623 b_accuracy = balanced_accuracy_score(y_test, y_pred) 624 f1 = f1_score(y_test, y_pred, average="weighted") 625 try: 626 roc_auc = roc_auc_score(y_test, y_pred) 627 except Exception as exception: 628 roc_auc = None 629 if self.ignore_warnings is False: 630 print("ROC AUC couldn't be calculated for " + name) 631 print(exception) 632 names.append(name) 633 Accuracy.append(accuracy) 634 B_Accuracy.append(b_accuracy) 635 ROC_AUC.append(roc_auc) 636 F1.append(f1) 637 TIME.append(time.time() - start) 638 if self.custom_metric is not None: 639 custom_metric = self.custom_metric(y_test, y_pred) 640 CUSTOM_METRIC.append(custom_metric) 641 if self.verbose > 0: 642 if self.custom_metric is not None: 643 print( 644 { 645 "Model": name, 646 "Accuracy": accuracy, 647 "Balanced Accuracy": b_accuracy, 648 "ROC AUC": roc_auc, 649 "F1 Score": f1, 650 self.custom_metric.__name__: custom_metric, 651 "Time taken": time.time() - start, 652 } 653 ) 654 else: 655 print( 656 { 657 "Model": name, 658 "Accuracy": accuracy, 659 "Balanced Accuracy": b_accuracy, 660 "ROC AUC": roc_auc, 661 "F1 Score": f1, 662 "Time taken": time.time() - start, 663 } 664 ) 665 if self.predictions: 666 predictions[name] = y_pred 667 except Exception as exception: 668 if self.ignore_warnings is False: 669 print(name + " model failed to execute") 670 print(exception) 671 672 if self.custom_metric is None: 673 scores = pd.DataFrame( 674 { 675 "Model": names, 676 "Accuracy": Accuracy, 677 "Balanced Accuracy": B_Accuracy, 678 "ROC AUC": ROC_AUC, 679 "F1 Score": F1, 680 "Time Taken": TIME, 681 } 682 ) 683 else: 684 scores = pd.DataFrame( 685 { 686 "Model": names, 687 "Accuracy": Accuracy, 688 "Balanced Accuracy": B_Accuracy, 689 "ROC AUC": ROC_AUC, 690 "F1 Score": F1, 691 "Custom metric": CUSTOM_METRIC, 692 "Time Taken": TIME, 693 } 694 ) 695 scores = scores.sort_values(by=self.sort_by, ascending=False).set_index("Model") 696 697 self.best_model_ = self.models_[scores.index[0]] 698 699 if self.predictions is True: 700 701 return scores, predictions 702 703 return scores
Fit classifiers to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train: array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test: array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
717 def provide_models(self, X_train, X_test, y_train, y_test): 718 """Returns all the model objects trained. If fit hasn't been called yet, 719 then it's called to return the models. 720 721 Parameters: 722 723 X_train: array-like, 724 Training vectors, where rows is the number of samples 725 and columns is the number of features. 726 727 X_test: array-like, 728 Testing vectors, where rows is the number of samples 729 and columns is the number of features. 730 731 y_train: array-like, 732 Training vectors, where rows is the number of samples 733 and columns is the number of features. 734 735 y_test: array-like, 736 Testing vectors, where rows is the number of samples 737 and columns is the number of features. 738 739 Returns: 740 741 models: dict-object, 742 Returns a dictionary with each model's pipeline as value 743 and key = name of the model. 744 """ 745 if len(self.models_.keys()) == 0: 746 self.fit(X_train, X_test, y_train, y_test) 747 748 return self.models_
Returns all the model objects trained. If fit hasn't been called yet, then it's called to return the models.
Parameters:
X_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
X_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
y_train: array-like, Training vectors, where rows is the number of samples and columns is the number of features.
y_test: array-like, Testing vectors, where rows is the number of samples and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model's pipeline as value
and key = name of the model.
90class LazyDeepRegressor(Custom, RegressorMixin): 91 """ 92 Fitting -- almost -- all the regression algorithms with layers of 93 nnetsauce's CustomRegressor and returning their scores. 94 95 Parameters: 96 97 verbose: int, optional (default=0) 98 Any positive number for verbosity. 99 100 ignore_warnings: bool, optional (default=True) 101 When set to True, the warning related to algorigms that are not able to run are ignored. 102 103 custom_metric: function, optional (default=None) 104 When function is provided, models are evaluated based on the custom evaluation metric provided. 105 106 predictions: bool, optional (default=False) 107 When set to True, the predictions of all the models models are returned as dataframe. 108 109 sort_by: string, optional (default='RMSE') 110 Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'. 111 or a custom metric identified by its name and provided by custom_metric. 112 113 random_state: int, optional (default=42) 114 Reproducibiility seed. 115 116 estimators: list, optional (default='all') 117 list of Estimators names or just 'all' (default='all') 118 119 preprocess: bool 120 preprocessing is done when set to True 121 122 n_jobs : int, when possible, run in parallel 123 For now, only used by individual models that support it. 124 125 n_layers: int, optional (default=3) 126 Number of layers of CustomRegressors to be used. 127 128 All the other parameters are the same as CustomRegressor's. 129 130 Attributes: 131 132 models_: dict-object 133 Returns a dictionary with each model pipeline as value 134 with key as name of models. 135 136 best_model_: object 137 Returns the best model pipeline based on the sort_by metric. 138 139 Examples: 140 141 import nnetsauce as ns 142 import numpy as np 143 from sklearn import datasets 144 from sklearn.utils import shuffle 145 146 diabetes = datasets.load_diabetes() 147 X, y = shuffle(diabetes.data, diabetes.target, random_state=13) 148 X = X.astype(np.float32) 149 150 offset = int(X.shape[0] * 0.9) 151 X_train, y_train = X[:offset], y[:offset] 152 X_test, y_test = X[offset:], y[offset:] 153 154 reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None) 155 models, predictions = reg.fit(X_train, X_test, y_train, y_test) 156 print(models) 157 158 """ 159 160 def __init__( 161 self, 162 verbose=0, 163 ignore_warnings=True, 164 custom_metric=None, 165 predictions=False, 166 sort_by="RMSE", 167 random_state=42, 168 estimators="all", 169 preprocess=False, 170 n_jobs=None, 171 # Defining depth 172 n_layers=3, 173 # CustomRegressor attributes 174 obj=None, 175 n_hidden_features=5, 176 activation_name="relu", 177 a=0.01, 178 nodes_sim="sobol", 179 bias=True, 180 dropout=0, 181 direct_link=True, 182 n_clusters=2, 183 cluster_encode=True, 184 type_clust="kmeans", 185 type_scaling=("std", "std", "std"), 186 col_sample=1, 187 row_sample=1, 188 seed=123, 189 backend="cpu", 190 ): 191 self.verbose = verbose 192 self.ignore_warnings = ignore_warnings 193 self.custom_metric = custom_metric 194 self.predictions = predictions 195 self.sort_by = sort_by 196 self.models_ = {} 197 self.best_model_ = None 198 self.random_state = random_state 199 self.estimators = estimators 200 self.preprocess = preprocess 201 self.n_layers = n_layers - 1 202 self.n_jobs = n_jobs 203 super().__init__( 204 obj=obj, 205 n_hidden_features=n_hidden_features, 206 activation_name=activation_name, 207 a=a, 208 nodes_sim=nodes_sim, 209 bias=bias, 210 dropout=dropout, 211 direct_link=direct_link, 212 n_clusters=n_clusters, 213 cluster_encode=cluster_encode, 214 type_clust=type_clust, 215 type_scaling=type_scaling, 216 col_sample=col_sample, 217 row_sample=row_sample, 218 seed=seed, 219 backend=backend, 220 ) 221 222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = custom_metric 332 333 print(scores_verbose) 334 if self.predictions: 335 predictions[name] = y_pred 336 except Exception as exception: 337 if self.ignore_warnings is False: 338 print(name + " model failed to execute") 339 print(exception) 340 341 if self.estimators == "all": 342 self.regressors = DEEPREGRESSORS 343 else: 344 self.regressors = [ 345 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 346 for est in all_estimators() 347 if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators)) 348 ] 349 350 if self.preprocess is True: 351 352 for name, model in tqdm(self.regressors): # do parallel exec 353 start = time.time() 354 try: 355 if "random_state" in model().get_params().keys(): 356 layer_regr = CustomRegressor( 357 obj=model(random_state=self.random_state), 358 n_hidden_features=self.n_hidden_features, 359 activation_name=self.activation_name, 360 a=self.a, 361 nodes_sim=self.nodes_sim, 362 bias=self.bias, 363 dropout=self.dropout, 364 direct_link=self.direct_link, 365 n_clusters=self.n_clusters, 366 cluster_encode=self.cluster_encode, 367 type_clust=self.type_clust, 368 type_scaling=self.type_scaling, 369 col_sample=self.col_sample, 370 row_sample=self.row_sample, 371 seed=self.seed, 372 backend=self.backend, 373 ) 374 else: 375 layer_regr = CustomRegressor( 376 obj=model(), 377 n_hidden_features=self.n_hidden_features, 378 activation_name=self.activation_name, 379 a=self.a, 380 nodes_sim=self.nodes_sim, 381 bias=self.bias, 382 dropout=self.dropout, 383 direct_link=self.direct_link, 384 n_clusters=self.n_clusters, 385 cluster_encode=self.cluster_encode, 386 type_clust=self.type_clust, 387 type_scaling=self.type_scaling, 388 col_sample=self.col_sample, 389 row_sample=self.row_sample, 390 seed=self.seed, 391 backend=self.backend, 392 ) 393 394 for _ in range(self.n_layers): 395 layer_regr = deepcopy( 396 CustomRegressor( 397 obj=layer_regr, 398 n_hidden_features=self.n_hidden_features, 399 activation_name=self.activation_name, 400 a=self.a, 401 nodes_sim=self.nodes_sim, 402 bias=self.bias, 403 dropout=self.dropout, 404 direct_link=self.direct_link, 405 n_clusters=self.n_clusters, 406 cluster_encode=self.cluster_encode, 407 type_clust=self.type_clust, 408 type_scaling=self.type_scaling, 409 col_sample=self.col_sample, 410 row_sample=self.row_sample, 411 seed=self.seed, 412 backend=self.backend, 413 ) 414 ) 415 416 layer_regr.fit(X_train, y_train) 417 418 pipe = Pipeline( 419 steps=[ 420 ("preprocessor", preprocessor), 421 ("regressor", layer_regr), 422 ] 423 ) 424 425 pipe.fit(X_train, y_train) 426 427 self.models_[name] = pipe 428 y_pred = pipe.predict(X_test) 429 r_squared = r2_score(y_test, y_pred) 430 adj_rsquared = adjusted_rsquared( 431 r_squared, X_test.shape[0], X_test.shape[1] 432 ) 433 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 434 435 names.append(name) 436 R2.append(r_squared) 437 ADJR2.append(adj_rsquared) 438 RMSE.append(rmse) 439 TIME.append(time.time() - start) 440 441 if self.custom_metric: 442 custom_metric = self.custom_metric(y_test, y_pred) 443 CUSTOM_METRIC.append(custom_metric) 444 445 if self.verbose > 0: 446 scores_verbose = { 447 "Model": name, 448 "R-Squared": r_squared, 449 "Adjusted R-Squared": adj_rsquared, 450 "RMSE": rmse, 451 "Time taken": time.time() - start, 452 } 453 454 if self.custom_metric: 455 scores_verbose[self.custom_metric.__name__] = custom_metric 456 457 print(scores_verbose) 458 if self.predictions: 459 predictions[name] = y_pred 460 except Exception as exception: 461 if self.ignore_warnings is False: 462 print(name + " model failed to execute") 463 print(exception) 464 465 else: # no preprocessing 466 467 for name, model in tqdm(self.regressors): # do parallel exec 468 start = time.time() 469 try: 470 if "random_state" in model().get_params().keys(): 471 layer_regr = CustomRegressor( 472 obj=model(random_state=self.random_state), 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 ) 489 else: 490 layer_regr = CustomRegressor( 491 obj=model(), 492 n_hidden_features=self.n_hidden_features, 493 activation_name=self.activation_name, 494 a=self.a, 495 nodes_sim=self.nodes_sim, 496 bias=self.bias, 497 dropout=self.dropout, 498 direct_link=self.direct_link, 499 n_clusters=self.n_clusters, 500 cluster_encode=self.cluster_encode, 501 type_clust=self.type_clust, 502 type_scaling=self.type_scaling, 503 col_sample=self.col_sample, 504 row_sample=self.row_sample, 505 seed=self.seed, 506 backend=self.backend, 507 ) 508 509 layer_regr.fit(X_train, y_train) 510 511 for _ in range(self.n_layers): 512 layer_regr = deepcopy( 513 CustomRegressor( 514 obj=layer_regr, 515 n_hidden_features=self.n_hidden_features, 516 activation_name=self.activation_name, 517 a=self.a, 518 nodes_sim=self.nodes_sim, 519 bias=self.bias, 520 dropout=self.dropout, 521 direct_link=self.direct_link, 522 n_clusters=self.n_clusters, 523 cluster_encode=self.cluster_encode, 524 type_clust=self.type_clust, 525 type_scaling=self.type_scaling, 526 col_sample=self.col_sample, 527 row_sample=self.row_sample, 528 seed=self.seed, 529 backend=self.backend, 530 ) 531 ) 532 533 # layer_regr.fit(X_train, y_train) 534 535 layer_regr.fit(X_train, y_train) 536 537 self.models_[name] = layer_regr 538 y_pred = layer_regr.predict(X_test) 539 540 r_squared = r2_score(y_test, y_pred) 541 adj_rsquared = adjusted_rsquared( 542 r_squared, X_test.shape[0], X_test.shape[1] 543 ) 544 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 545 546 names.append(name) 547 R2.append(r_squared) 548 ADJR2.append(adj_rsquared) 549 RMSE.append(rmse) 550 TIME.append(time.time() - start) 551 552 if self.custom_metric: 553 custom_metric = self.custom_metric(y_test, y_pred) 554 CUSTOM_METRIC.append(custom_metric) 555 556 if self.verbose > 0: 557 scores_verbose = { 558 "Model": name, 559 "R-Squared": r_squared, 560 "Adjusted R-Squared": adj_rsquared, 561 "RMSE": rmse, 562 "Time taken": time.time() - start, 563 } 564 565 if self.custom_metric: 566 scores_verbose[self.custom_metric.__name__] = custom_metric 567 568 print(scores_verbose) 569 if self.predictions: 570 predictions[name] = y_pred 571 except Exception as exception: 572 if self.ignore_warnings is False: 573 print(name + " model failed to execute") 574 print(exception) 575 576 scores = { 577 "Model": names, 578 "Adjusted R-Squared": ADJR2, 579 "R-Squared": R2, 580 "RMSE": RMSE, 581 "Time Taken": TIME, 582 } 583 584 if self.custom_metric: 585 scores["Custom metric"] = CUSTOM_METRIC 586 587 scores = pd.DataFrame(scores) 588 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model") 589 590 self.best_model_ = self.models_[scores.index[0]] 591 592 if self.predictions is True: 593 594 return scores, predictions 595 596 return scores 597 598 def get_best_model(self): 599 """ 600 This function returns the best model pipeline based on the sort_by metric. 601 602 Returns: 603 604 best_model: object, 605 Returns the best model pipeline based on the sort_by metric. 606 607 """ 608 return self.best_model_ 609 610 def provide_models(self, X_train, X_test, y_train, y_test): 611 """ 612 This function returns all the model objects trained in fit function. 613 If fit is not called already, then we call fit and then return the models. 614 615 Parameters: 616 617 X_train : array-like, 618 Training vectors, where rows is the number of samples 619 and columns is the number of features. 620 621 X_test : array-like, 622 Testing vectors, where rows is the number of samples 623 and columns is the number of features. 624 625 y_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 y_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 Returns: 634 635 models: dict-object, 636 Returns a dictionary with each model pipeline as value 637 with key as name of models. 638 639 """ 640 if len(self.models_.keys()) == 0: 641 self.fit(X_train, X_test, y_train, y_test) 642 643 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'R-Squared', 'Adjusted R-Squared', 'RMSE', 'Time Taken' and 'Custom Metric'.
or a custom metric identified by its name and provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators names or just 'all' (default='all')
preprocess: bool
preprocessing is done when set to True
n_jobs : int, when possible, run in parallel
For now, only used by individual models that support it.
n_layers: int, optional (default=3)
Number of layers of CustomRegressors to be used.
All the other parameters are the same as CustomRegressor's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
import nnetsauce as ns
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]
reg = ns.LazyDeepRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
models, predictions = reg.fit(X_train, X_test, y_train, y_test)
print(models)
222 def fit(self, X_train, X_test, y_train, y_test): 223 """Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test. 224 225 Parameters: 226 227 X_train : array-like, 228 Training vectors, where rows is the number of samples 229 and columns is the number of features. 230 231 X_test : array-like, 232 Testing vectors, where rows is the number of samples 233 and columns is the number of features. 234 235 y_train : array-like, 236 Training vectors, where rows is the number of samples 237 and columns is the number of features. 238 239 y_test : array-like, 240 Testing vectors, where rows is the number of samples 241 and columns is the number of features. 242 243 Returns: 244 ------- 245 scores: Pandas DataFrame 246 Returns metrics of all the models in a Pandas DataFrame. 247 248 predictions : Pandas DataFrame 249 Returns predictions of all the models in a Pandas DataFrame. 250 251 """ 252 R2 = [] 253 ADJR2 = [] 254 RMSE = [] 255 # WIN = [] 256 names = [] 257 TIME = [] 258 predictions = {} 259 260 if self.custom_metric: 261 CUSTOM_METRIC = [] 262 263 if isinstance(X_train, np.ndarray): 264 X_train = pd.DataFrame(X_train) 265 X_test = pd.DataFrame(X_test) 266 267 numeric_features = X_train.select_dtypes(include=[np.number]).columns 268 categorical_features = X_train.select_dtypes(include=["object"]).columns 269 270 categorical_low, categorical_high = get_card_split( 271 X_train, categorical_features 272 ) 273 274 if self.preprocess is True: 275 preprocessor = ColumnTransformer( 276 transformers=[ 277 ("numeric", numeric_transformer, numeric_features), 278 ( 279 "categorical_low", 280 categorical_transformer_low, 281 categorical_low, 282 ), 283 ( 284 "categorical_high", 285 categorical_transformer_high, 286 categorical_high, 287 ), 288 ] 289 ) 290 291 # base models 292 try: 293 baseline_names = ["RandomForestRegressor", "XGBRegressor"] 294 baseline_models = [RandomForestRegressor(), xgb.XGBRegressor()] 295 except Exception as exception: 296 baseline_names = ["RandomForestRegressor"] 297 baseline_models = [RandomForestRegressor()] 298 299 for name, model in zip(baseline_names, baseline_models): 300 start = time.time() 301 try: 302 model.fit(X_train, y_train) 303 self.models_[name] = model 304 y_pred = model.predict(X_test) 305 r_squared = r2_score(y_test, y_pred) 306 adj_rsquared = adjusted_rsquared( 307 r_squared, X_test.shape[0], X_test.shape[1] 308 ) 309 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 310 311 names.append(name) 312 R2.append(r_squared) 313 ADJR2.append(adj_rsquared) 314 RMSE.append(rmse) 315 TIME.append(time.time() - start) 316 317 if self.custom_metric: 318 custom_metric = self.custom_metric(y_test, y_pred) 319 CUSTOM_METRIC.append(custom_metric) 320 321 if self.verbose > 0: 322 scores_verbose = { 323 "Model": name, 324 "R-Squared": r_squared, 325 "Adjusted R-Squared": adj_rsquared, 326 "RMSE": rmse, 327 "Time taken": time.time() - start, 328 } 329 330 if self.custom_metric: 331 scores_verbose[self.custom_metric.__name__] = custom_metric 332 333 print(scores_verbose) 334 if self.predictions: 335 predictions[name] = y_pred 336 except Exception as exception: 337 if self.ignore_warnings is False: 338 print(name + " model failed to execute") 339 print(exception) 340 341 if self.estimators == "all": 342 self.regressors = DEEPREGRESSORS 343 else: 344 self.regressors = [ 345 ("DeepCustomRegressor(" + est[0] + ")", est[1]) 346 for est in all_estimators() 347 if (issubclass(est[1], RegressorMixin) and (est[0] in self.estimators)) 348 ] 349 350 if self.preprocess is True: 351 352 for name, model in tqdm(self.regressors): # do parallel exec 353 start = time.time() 354 try: 355 if "random_state" in model().get_params().keys(): 356 layer_regr = CustomRegressor( 357 obj=model(random_state=self.random_state), 358 n_hidden_features=self.n_hidden_features, 359 activation_name=self.activation_name, 360 a=self.a, 361 nodes_sim=self.nodes_sim, 362 bias=self.bias, 363 dropout=self.dropout, 364 direct_link=self.direct_link, 365 n_clusters=self.n_clusters, 366 cluster_encode=self.cluster_encode, 367 type_clust=self.type_clust, 368 type_scaling=self.type_scaling, 369 col_sample=self.col_sample, 370 row_sample=self.row_sample, 371 seed=self.seed, 372 backend=self.backend, 373 ) 374 else: 375 layer_regr = CustomRegressor( 376 obj=model(), 377 n_hidden_features=self.n_hidden_features, 378 activation_name=self.activation_name, 379 a=self.a, 380 nodes_sim=self.nodes_sim, 381 bias=self.bias, 382 dropout=self.dropout, 383 direct_link=self.direct_link, 384 n_clusters=self.n_clusters, 385 cluster_encode=self.cluster_encode, 386 type_clust=self.type_clust, 387 type_scaling=self.type_scaling, 388 col_sample=self.col_sample, 389 row_sample=self.row_sample, 390 seed=self.seed, 391 backend=self.backend, 392 ) 393 394 for _ in range(self.n_layers): 395 layer_regr = deepcopy( 396 CustomRegressor( 397 obj=layer_regr, 398 n_hidden_features=self.n_hidden_features, 399 activation_name=self.activation_name, 400 a=self.a, 401 nodes_sim=self.nodes_sim, 402 bias=self.bias, 403 dropout=self.dropout, 404 direct_link=self.direct_link, 405 n_clusters=self.n_clusters, 406 cluster_encode=self.cluster_encode, 407 type_clust=self.type_clust, 408 type_scaling=self.type_scaling, 409 col_sample=self.col_sample, 410 row_sample=self.row_sample, 411 seed=self.seed, 412 backend=self.backend, 413 ) 414 ) 415 416 layer_regr.fit(X_train, y_train) 417 418 pipe = Pipeline( 419 steps=[ 420 ("preprocessor", preprocessor), 421 ("regressor", layer_regr), 422 ] 423 ) 424 425 pipe.fit(X_train, y_train) 426 427 self.models_[name] = pipe 428 y_pred = pipe.predict(X_test) 429 r_squared = r2_score(y_test, y_pred) 430 adj_rsquared = adjusted_rsquared( 431 r_squared, X_test.shape[0], X_test.shape[1] 432 ) 433 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 434 435 names.append(name) 436 R2.append(r_squared) 437 ADJR2.append(adj_rsquared) 438 RMSE.append(rmse) 439 TIME.append(time.time() - start) 440 441 if self.custom_metric: 442 custom_metric = self.custom_metric(y_test, y_pred) 443 CUSTOM_METRIC.append(custom_metric) 444 445 if self.verbose > 0: 446 scores_verbose = { 447 "Model": name, 448 "R-Squared": r_squared, 449 "Adjusted R-Squared": adj_rsquared, 450 "RMSE": rmse, 451 "Time taken": time.time() - start, 452 } 453 454 if self.custom_metric: 455 scores_verbose[self.custom_metric.__name__] = custom_metric 456 457 print(scores_verbose) 458 if self.predictions: 459 predictions[name] = y_pred 460 except Exception as exception: 461 if self.ignore_warnings is False: 462 print(name + " model failed to execute") 463 print(exception) 464 465 else: # no preprocessing 466 467 for name, model in tqdm(self.regressors): # do parallel exec 468 start = time.time() 469 try: 470 if "random_state" in model().get_params().keys(): 471 layer_regr = CustomRegressor( 472 obj=model(random_state=self.random_state), 473 n_hidden_features=self.n_hidden_features, 474 activation_name=self.activation_name, 475 a=self.a, 476 nodes_sim=self.nodes_sim, 477 bias=self.bias, 478 dropout=self.dropout, 479 direct_link=self.direct_link, 480 n_clusters=self.n_clusters, 481 cluster_encode=self.cluster_encode, 482 type_clust=self.type_clust, 483 type_scaling=self.type_scaling, 484 col_sample=self.col_sample, 485 row_sample=self.row_sample, 486 seed=self.seed, 487 backend=self.backend, 488 ) 489 else: 490 layer_regr = CustomRegressor( 491 obj=model(), 492 n_hidden_features=self.n_hidden_features, 493 activation_name=self.activation_name, 494 a=self.a, 495 nodes_sim=self.nodes_sim, 496 bias=self.bias, 497 dropout=self.dropout, 498 direct_link=self.direct_link, 499 n_clusters=self.n_clusters, 500 cluster_encode=self.cluster_encode, 501 type_clust=self.type_clust, 502 type_scaling=self.type_scaling, 503 col_sample=self.col_sample, 504 row_sample=self.row_sample, 505 seed=self.seed, 506 backend=self.backend, 507 ) 508 509 layer_regr.fit(X_train, y_train) 510 511 for _ in range(self.n_layers): 512 layer_regr = deepcopy( 513 CustomRegressor( 514 obj=layer_regr, 515 n_hidden_features=self.n_hidden_features, 516 activation_name=self.activation_name, 517 a=self.a, 518 nodes_sim=self.nodes_sim, 519 bias=self.bias, 520 dropout=self.dropout, 521 direct_link=self.direct_link, 522 n_clusters=self.n_clusters, 523 cluster_encode=self.cluster_encode, 524 type_clust=self.type_clust, 525 type_scaling=self.type_scaling, 526 col_sample=self.col_sample, 527 row_sample=self.row_sample, 528 seed=self.seed, 529 backend=self.backend, 530 ) 531 ) 532 533 # layer_regr.fit(X_train, y_train) 534 535 layer_regr.fit(X_train, y_train) 536 537 self.models_[name] = layer_regr 538 y_pred = layer_regr.predict(X_test) 539 540 r_squared = r2_score(y_test, y_pred) 541 adj_rsquared = adjusted_rsquared( 542 r_squared, X_test.shape[0], X_test.shape[1] 543 ) 544 rmse = np.sqrt(np.mean((y_test - y_pred) ** 2)) 545 546 names.append(name) 547 R2.append(r_squared) 548 ADJR2.append(adj_rsquared) 549 RMSE.append(rmse) 550 TIME.append(time.time() - start) 551 552 if self.custom_metric: 553 custom_metric = self.custom_metric(y_test, y_pred) 554 CUSTOM_METRIC.append(custom_metric) 555 556 if self.verbose > 0: 557 scores_verbose = { 558 "Model": name, 559 "R-Squared": r_squared, 560 "Adjusted R-Squared": adj_rsquared, 561 "RMSE": rmse, 562 "Time taken": time.time() - start, 563 } 564 565 if self.custom_metric: 566 scores_verbose[self.custom_metric.__name__] = custom_metric 567 568 print(scores_verbose) 569 if self.predictions: 570 predictions[name] = y_pred 571 except Exception as exception: 572 if self.ignore_warnings is False: 573 print(name + " model failed to execute") 574 print(exception) 575 576 scores = { 577 "Model": names, 578 "Adjusted R-Squared": ADJR2, 579 "R-Squared": R2, 580 "RMSE": RMSE, 581 "Time Taken": TIME, 582 } 583 584 if self.custom_metric: 585 scores["Custom metric"] = CUSTOM_METRIC 586 587 scores = pd.DataFrame(scores) 588 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index("Model") 589 590 self.best_model_ = self.models_[scores.index[0]] 591 592 if self.predictions is True: 593 594 return scores, predictions 595 596 return scores
Fit Regression algorithms to X_train and y_train, predict and score on X_test, y_test.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
scores: Pandas DataFrame Returns metrics of all the models in a Pandas DataFrame.
predictions : Pandas DataFrame Returns predictions of all the models in a Pandas DataFrame.
610 def provide_models(self, X_train, X_test, y_train, y_test): 611 """ 612 This function returns all the model objects trained in fit function. 613 If fit is not called already, then we call fit and then return the models. 614 615 Parameters: 616 617 X_train : array-like, 618 Training vectors, where rows is the number of samples 619 and columns is the number of features. 620 621 X_test : array-like, 622 Testing vectors, where rows is the number of samples 623 and columns is the number of features. 624 625 y_train : array-like, 626 Training vectors, where rows is the number of samples 627 and columns is the number of features. 628 629 y_test : array-like, 630 Testing vectors, where rows is the number of samples 631 and columns is the number of features. 632 633 Returns: 634 635 models: dict-object, 636 Returns a dictionary with each model pipeline as value 637 with key as name of models. 638 639 """ 640 if len(self.models_.keys()) == 0: 641 self.fit(X_train, X_test, y_train, y_test) 642 643 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
y_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
y_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
988class LazyMTS(LazyDeepMTS): 989 """ 990 Fitting -- almost -- all the regression algorithms to multivariate time series 991 and returning their scores (no layers). 992 993 Parameters: 994 995 verbose: int, optional (default=0) 996 Any positive number for verbosity. 997 998 ignore_warnings: bool, optional (default=True) 999 When set to True, the warning related to algorigms that are not 1000 able to run are ignored. 1001 1002 custom_metric: function, optional (default=None) 1003 When function is provided, models are evaluated based on the custom 1004 evaluation metric provided. 1005 1006 predictions: bool, optional (default=False) 1007 When set to True, the predictions of all the models models are returned as dataframe. 1008 1009 sort_by: string, optional (default='RMSE') 1010 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 1011 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 1012 provided by custom_metric. 1013 1014 random_state: int, optional (default=42) 1015 Reproducibiility seed. 1016 1017 estimators: list, optional (default='all') 1018 list of Estimators (regression algorithms) names or just 'all' (default='all') 1019 1020 preprocess: bool, preprocessing is done when set to True 1021 1022 h: int, optional (default=None) 1023 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 1024 1025 All the other parameters are the same as MTS's. 1026 1027 Attributes: 1028 1029 models_: dict-object 1030 Returns a dictionary with each model pipeline as value 1031 with key as name of models. 1032 1033 best_model_: object 1034 Returns the best model pipeline based on the sort_by metric. 1035 1036 Examples: 1037 1038 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 1039 1040 """ 1041 1042 def __init__( 1043 self, 1044 verbose=0, 1045 ignore_warnings=True, 1046 custom_metric=None, 1047 predictions=False, 1048 sort_by=None, # leave it as is 1049 random_state=42, 1050 estimators="all", 1051 preprocess=False, 1052 h=None, 1053 # MTS attributes 1054 obj=None, 1055 n_hidden_features=5, 1056 activation_name="relu", 1057 a=0.01, 1058 nodes_sim="sobol", 1059 bias=True, 1060 dropout=0, 1061 direct_link=True, 1062 n_clusters=2, 1063 cluster_encode=True, 1064 type_clust="kmeans", 1065 type_scaling=("std", "std", "std"), 1066 lags=15, 1067 type_pi="scp2-kde", 1068 block_size=None, 1069 replications=None, 1070 kernel=None, 1071 agg="mean", 1072 seed=123, 1073 backend="cpu", 1074 show_progress=False, 1075 ): 1076 super().__init__( 1077 verbose=verbose, 1078 ignore_warnings=ignore_warnings, 1079 custom_metric=custom_metric, 1080 predictions=predictions, 1081 sort_by=sort_by, 1082 random_state=random_state, 1083 estimators=estimators, 1084 preprocess=preprocess, 1085 n_layers=1, 1086 h=h, 1087 obj=obj, 1088 n_hidden_features=n_hidden_features, 1089 activation_name=activation_name, 1090 a=a, 1091 nodes_sim=nodes_sim, 1092 bias=bias, 1093 dropout=dropout, 1094 direct_link=direct_link, 1095 n_clusters=n_clusters, 1096 cluster_encode=cluster_encode, 1097 type_clust=type_clust, 1098 type_scaling=type_scaling, 1099 lags=lags, 1100 type_pi=type_pi, 1101 block_size=block_size, 1102 replications=replications, 1103 kernel=kernel, 1104 agg=agg, 1105 seed=seed, 1106 backend=backend, 1107 show_progress=show_progress, 1108 )
Fitting -- almost -- all the regression algorithms to multivariate time series and returning their scores (no layers).
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
104class LazyDeepMTS(MTS): 105 """ 106 107 Fitting -- almost -- all the regression algorithms with layers of 108 nnetsauce's CustomRegressor to multivariate time series 109 and returning their scores. 110 111 Parameters: 112 113 verbose: int, optional (default=0) 114 Any positive number for verbosity. 115 116 ignore_warnings: bool, optional (default=True) 117 When set to True, the warning related to algorigms that are not 118 able to run are ignored. 119 120 custom_metric: function, optional (default=None) 121 When function is provided, models are evaluated based on the custom 122 evaluation metric provided. 123 124 predictions: bool, optional (default=False) 125 When set to True, the predictions of all the models models are returned as dataframe. 126 127 sort_by: string, optional (default='RMSE') 128 Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE', 129 'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and 130 provided by custom_metric. 131 132 random_state: int, optional (default=42) 133 Reproducibiility seed. 134 135 estimators: list, optional (default='all') 136 list of Estimators (regression algorithms) names or just 'all' (default='all') 137 138 preprocess: bool, preprocessing is done when set to True 139 140 n_layers: int, optional (default=1) 141 Number of layers in the network. When set to 1, the model is equivalent to a MTS. 142 143 h: int, optional (default=None) 144 Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]). 145 146 All the other parameters are the same as MTS's. 147 148 Attributes: 149 150 models_: dict-object 151 Returns a dictionary with each model pipeline as value 152 with key as name of models. 153 154 best_model_: object 155 Returns the best model pipeline based on the sort_by metric. 156 157 Examples: 158 159 See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict 160 161 """ 162 163 def __init__( 164 self, 165 verbose=0, 166 ignore_warnings=True, 167 custom_metric=None, 168 predictions=False, 169 sort_by=None, # leave it as is 170 random_state=42, 171 estimators="all", 172 preprocess=False, 173 n_layers=1, 174 h=None, 175 # MTS attributes 176 obj=None, 177 n_hidden_features=5, 178 activation_name="relu", 179 a=0.01, 180 nodes_sim="sobol", 181 bias=True, 182 dropout=0, 183 direct_link=True, 184 n_clusters=2, 185 cluster_encode=True, 186 type_clust="kmeans", 187 type_scaling=("std", "std", "std"), 188 lags=15, 189 type_pi="scp2-kde", 190 block_size=None, 191 replications=None, 192 kernel=None, 193 agg="mean", 194 seed=123, 195 backend="cpu", 196 show_progress=False, 197 ): 198 self.verbose = verbose 199 self.ignore_warnings = ignore_warnings 200 self.custom_metric = custom_metric 201 self.predictions = predictions 202 self.sort_by = sort_by 203 self.models_ = {} 204 self.best_model_ = None 205 self.random_state = random_state 206 self.estimators = estimators 207 self.preprocess = preprocess 208 self.n_layers = n_layers 209 self.h = h 210 super().__init__( 211 obj=obj, 212 n_hidden_features=n_hidden_features, 213 activation_name=activation_name, 214 a=a, 215 nodes_sim=nodes_sim, 216 bias=bias, 217 dropout=dropout, 218 direct_link=direct_link, 219 n_clusters=n_clusters, 220 cluster_encode=cluster_encode, 221 type_clust=type_clust, 222 type_scaling=type_scaling, 223 seed=seed, 224 backend=backend, 225 lags=lags, 226 type_pi=type_pi, 227 block_size=block_size, 228 replications=replications, 229 kernel=kernel, 230 agg=agg, 231 verbose=verbose, 232 show_progress=show_progress, 233 ) 234 if self.replications is not None or self.type_pi == "gaussian": 235 if self.sort_by is None: 236 self.sort_by = "WINKLERSCORE" 237 else: 238 if self.sort_by is None: 239 self.sort_by = "RMSE" 240 241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0 : self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0 : self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95) 386 coveragecalc = coverage(X_pred, X_test, level=95) 387 else: 388 winklerscore = winkler_score( 389 obj=X_pred, actual=X_test, level=95, per_series=True 390 ) 391 coveragecalc = coverage(X_pred, X_test, level=95, per_series=True) 392 WINKLERSCORE.append(winklerscore) 393 COVERAGE.append(coveragecalc) 394 TIME.append(time.time() - start) 395 396 if self.estimators == "all": 397 if self.n_layers <= 1: 398 self.regressors = REGRESSORSMTS 399 else: 400 self.regressors = DEEPREGRESSORSMTS 401 else: 402 if self.n_layers <= 1: 403 self.regressors = [ 404 ("MTS(" + est[0] + ")", est[1]) 405 for est in all_estimators() 406 if ( 407 issubclass(est[1], RegressorMixin) 408 and (est[0] in self.estimators) 409 ) 410 ] 411 else: # self.n_layers > 1 412 self.regressors = [ 413 ("DeepMTS(" + est[0] + ")", est[1]) 414 for est in all_estimators() 415 if ( 416 issubclass(est[1], RegressorMixin) 417 and (est[0] in self.estimators) 418 ) 419 ] 420 421 if self.preprocess is True: 422 for name, model in tqdm(self.regressors): # do parallel exec 423 start = time.time() 424 try: 425 if "random_state" in model().get_params().keys(): 426 pipe = Pipeline( 427 steps=[ 428 ("preprocessor", preprocessor), 429 ( 430 "regressor", 431 DeepMTS( 432 obj=model( 433 random_state=self.random_state, 434 **kwargs, 435 ), 436 n_layers=self.n_layers, 437 n_hidden_features=self.n_hidden_features, 438 activation_name=self.activation_name, 439 a=self.a, 440 nodes_sim=self.nodes_sim, 441 bias=self.bias, 442 dropout=self.dropout, 443 direct_link=self.direct_link, 444 n_clusters=self.n_clusters, 445 cluster_encode=self.cluster_encode, 446 type_clust=self.type_clust, 447 type_scaling=self.type_scaling, 448 lags=self.lags, 449 type_pi=self.type_pi, 450 block_size=self.block_size, 451 replications=self.replications, 452 kernel=self.kernel, 453 agg=self.agg, 454 seed=self.seed, 455 backend=self.backend, 456 show_progress=self.show_progress, 457 ), 458 ), 459 ] 460 ) 461 else: # "random_state" in model().get_params().keys() 462 pipe = Pipeline( 463 steps=[ 464 ("preprocessor", preprocessor), 465 ( 466 "regressor", 467 DeepMTS( 468 obj=model(**kwargs), 469 n_layers=self.n_layers, 470 n_hidden_features=self.n_hidden_features, 471 activation_name=self.activation_name, 472 a=self.a, 473 nodes_sim=self.nodes_sim, 474 bias=self.bias, 475 dropout=self.dropout, 476 direct_link=self.direct_link, 477 n_clusters=self.n_clusters, 478 cluster_encode=self.cluster_encode, 479 type_clust=self.type_clust, 480 type_scaling=self.type_scaling, 481 lags=self.lags, 482 type_pi=self.type_pi, 483 block_size=self.block_size, 484 replications=self.replications, 485 kernel=self.kernel, 486 agg=self.agg, 487 seed=self.seed, 488 backend=self.backend, 489 show_progress=self.show_progress, 490 ), 491 ), 492 ] 493 ) 494 495 pipe.fit(X_train, **kwargs) 496 # pipe.fit(X_train, xreg=xreg) 497 498 self.models_[name] = pipe 499 500 if self.h is None: 501 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 502 else: 503 assert self.h > 0, "h must be > 0" 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 506 if (self.replications is not None) or (self.type_pi == "gaussian"): 507 rmse = mean_errors( 508 actual=X_test, 509 pred=X_pred, 510 scoring="root_mean_squared_error", 511 per_series=per_series, 512 ) 513 mae = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="mean_absolute_error", 517 per_series=per_series, 518 ) 519 mpl = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_pinball_loss", 523 per_series=per_series, 524 ) 525 winklerscore = winkler_score( 526 obj=X_pred, 527 actual=X_test, 528 level=95, 529 per_series=per_series, 530 ) 531 coveragecalc = coverage( 532 X_pred, X_test, level=95, per_series=per_series 533 ) 534 else: 535 rmse = mean_errors( 536 actual=X_test, 537 pred=X_pred, 538 scoring="root_mean_squared_error", 539 per_series=per_series, 540 ) 541 mae = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="mean_absolute_error", 545 per_series=per_series, 546 ) 547 mpl = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_pinball_loss", 551 per_series=per_series, 552 ) 553 554 names.append(name) 555 RMSE.append(rmse) 556 MAE.append(mae) 557 MPL.append(mpl) 558 559 if (self.replications is not None) or (self.type_pi == "gaussian"): 560 WINKLERSCORE.append(winklerscore) 561 COVERAGE.append(coveragecalc) 562 TIME.append(time.time() - start) 563 564 if self.custom_metric is not None: 565 try: 566 custom_metric = self.custom_metric(X_test, X_pred) 567 CUSTOM_METRIC.append(custom_metric) 568 except Exception as e: 569 custom_metric = np.iinfo(np.float32).max 570 CUSTOM_METRIC.append(custom_metric) 571 572 if self.verbose > 0: 573 if (self.replications is not None) or ( 574 self.type_pi == "gaussian" 575 ): 576 scores_verbose = { 577 "Model": name, 578 "RMSE": rmse, 579 "MAE": mae, 580 "MPL": mpl, 581 "WINKLERSCORE": winklerscore, 582 "COVERAGE": coveragecalc, 583 "Time taken": time.time() - start, 584 } 585 else: 586 scores_verbose = { 587 "Model": name, 588 "RMSE": rmse, 589 "MAE": mae, 590 "MPL": mpl, 591 "Time taken": time.time() - start, 592 } 593 594 if self.custom_metric is not None: 595 scores_verbose["Custom metric"] = custom_metric 596 597 if self.predictions: 598 predictions[name] = X_pred 599 except Exception as exception: 600 if self.ignore_warnings is False: 601 print(name + " model failed to execute") 602 print(exception) 603 604 else: # no preprocessing 605 606 for name, model in tqdm(self.regressors): # do parallel exec 607 start = time.time() 608 try: 609 if "random_state" in model().get_params().keys(): 610 pipe = DeepMTS( 611 obj=model(random_state=self.random_state, **kwargs), 612 n_layers=self.n_layers, 613 n_hidden_features=self.n_hidden_features, 614 activation_name=self.activation_name, 615 a=self.a, 616 nodes_sim=self.nodes_sim, 617 bias=self.bias, 618 dropout=self.dropout, 619 direct_link=self.direct_link, 620 n_clusters=self.n_clusters, 621 cluster_encode=self.cluster_encode, 622 type_clust=self.type_clust, 623 type_scaling=self.type_scaling, 624 lags=self.lags, 625 type_pi=self.type_pi, 626 block_size=self.block_size, 627 replications=self.replications, 628 kernel=self.kernel, 629 agg=self.agg, 630 seed=self.seed, 631 backend=self.backend, 632 show_progress=self.show_progress, 633 ) 634 else: 635 pipe = DeepMTS( 636 obj=model(**kwargs), 637 n_layers=self.n_layers, 638 n_hidden_features=self.n_hidden_features, 639 activation_name=self.activation_name, 640 a=self.a, 641 nodes_sim=self.nodes_sim, 642 bias=self.bias, 643 dropout=self.dropout, 644 direct_link=self.direct_link, 645 n_clusters=self.n_clusters, 646 cluster_encode=self.cluster_encode, 647 type_clust=self.type_clust, 648 type_scaling=self.type_scaling, 649 lags=self.lags, 650 type_pi=self.type_pi, 651 block_size=self.block_size, 652 replications=self.replications, 653 kernel=self.kernel, 654 agg=self.agg, 655 seed=self.seed, 656 backend=self.backend, 657 show_progress=self.show_progress, 658 ) 659 660 pipe.fit(X_train, xreg, **kwargs) 661 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 662 663 self.models_[name] = pipe 664 665 if self.preprocess is True: 666 if self.h is None: 667 X_pred = pipe["regressor"].predict( 668 h=X_test.shape[0], **kwargs 669 ) 670 else: 671 assert ( 672 self.h > 0 and self.h <= X_test.shape[0] 673 ), "h must be > 0 and < X_test.shape[0]" 674 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 675 676 else: 677 678 if self.h is None: 679 X_pred = pipe.predict( 680 h=X_test.shape[0], 681 **kwargs, 682 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 683 ) 684 else: 685 assert ( 686 self.h > 0 and self.h <= X_test.shape[0] 687 ), "h must be > 0 and < X_test.shape[0]" 688 X_pred = pipe.predict(h=self.h, **kwargs) 689 690 if self.h is None: 691 if (self.replications is not None) or ( 692 self.type_pi == "gaussian" 693 ): 694 rmse = mean_errors( 695 actual=X_test, 696 pred=X_pred.mean, 697 scoring="root_mean_squared_error", 698 per_series=per_series, 699 ) 700 mae = mean_errors( 701 actual=X_test, 702 pred=X_pred.mean, 703 scoring="mean_absolute_error", 704 per_series=per_series, 705 ) 706 mpl = mean_errors( 707 actual=X_test, 708 pred=X_pred.mean, 709 scoring="mean_pinball_loss", 710 per_series=per_series, 711 ) 712 winklerscore = winkler_score( 713 obj=X_pred, 714 actual=X_test, 715 level=95, 716 per_series=per_series, 717 ) 718 coveragecalc = coverage( 719 X_pred, X_test, level=95, per_series=per_series 720 ) 721 else: # no prediction interval 722 rmse = mean_errors( 723 actual=X_test, 724 pred=X_pred, 725 scoring="root_mean_squared_error", 726 per_series=per_series, 727 ) 728 mae = mean_errors( 729 actual=X_test, 730 pred=X_pred, 731 scoring="mean_absolute_error", 732 per_series=per_series, 733 ) 734 mpl = mean_errors( 735 actual=X_test, 736 pred=X_pred, 737 scoring="mean_pinball_loss", 738 per_series=per_series, 739 ) 740 else: # self.h is not None 741 if (self.replications is not None) or ( 742 self.type_pi == "gaussian" 743 ): 744 745 if isinstance(X_test, pd.DataFrame): 746 X_test_h = X_test.iloc[0 : self.h, :] 747 rmse = mean_errors( 748 actual=X_test_h, 749 pred=X_pred, 750 scoring="root_mean_squared_error", 751 per_series=per_series, 752 ) 753 mae = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="mean_absolute_error", 757 per_series=per_series, 758 ) 759 mpl = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_pinball_loss", 763 per_series=per_series, 764 ) 765 winklerscore = winkler_score( 766 obj=X_pred, 767 actual=X_test_h, 768 level=95, 769 per_series=per_series, 770 ) 771 coveragecalc = coverage( 772 X_pred, 773 X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 else: 778 X_test_h = X_test[0 : self.h, :] 779 rmse = mean_errors( 780 actual=X_test_h, 781 pred=X_pred, 782 scoring="root_mean_squared_error", 783 per_series=per_series, 784 ) 785 mae = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="mean_absolute_error", 789 per_series=per_series, 790 ) 791 mpl = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_pinball_loss", 795 per_series=per_series, 796 ) 797 winklerscore = winkler_score( 798 obj=X_pred, 799 actual=X_test_h, 800 level=95, 801 per_series=per_series, 802 ) 803 coveragecalc = coverage( 804 X_pred, 805 X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 else: # no prediction interval 810 811 if isinstance(X_test, pd.DataFrame): 812 X_test_h = X_test.iloc[0 : self.h, :] 813 rmse = mean_errors( 814 actual=X_test_h, 815 pred=X_pred, 816 scoring="root_mean_squared_error", 817 per_series=per_series, 818 ) 819 mae = mean_errors( 820 actual=X_test_h, 821 pred=X_pred, 822 scoring="mean_absolute_error", 823 per_series=per_series, 824 ) 825 mpl = mean_errors( 826 actual=X_test_h, 827 pred=X_pred, 828 scoring="mean_pinball_loss", 829 per_series=per_series, 830 ) 831 else: 832 X_test_h = X_test[0 : self.h, :] 833 rmse = mean_errors( 834 actual=X_test_h, 835 pred=X_pred, 836 scoring="root_mean_squared_error", 837 per_series=per_series, 838 ) 839 mae = mean_errors( 840 actual=X_test_h, 841 pred=X_pred, 842 scoring="mean_absolute_error", 843 per_series=per_series, 844 ) 845 846 names.append(name) 847 RMSE.append(rmse) 848 MAE.append(mae) 849 MPL.append(mpl) 850 if (self.replications is not None) or (self.type_pi == "gaussian"): 851 WINKLERSCORE.append(winklerscore) 852 COVERAGE.append(coveragecalc) 853 TIME.append(time.time() - start) 854 855 if self.custom_metric is not None: 856 try: 857 if self.h is None: 858 custom_metric = self.custom_metric(X_test, X_pred) 859 else: 860 custom_metric = self.custom_metric(X_test_h, X_pred) 861 CUSTOM_METRIC.append(custom_metric) 862 except Exception as e: 863 custom_metric = np.iinfo(np.float32).max 864 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 865 866 if self.verbose > 0: 867 if (self.replications is not None) or ( 868 self.type_pi == "gaussian" 869 ): 870 scores_verbose = { 871 "Model": name, 872 "RMSE": rmse, 873 "MAE": mae, 874 "MPL": mpl, 875 "WINKLERSCORE": winklerscore, 876 "COVERAGE": coveragecalc, 877 "Time taken": time.time() - start, 878 } 879 else: 880 scores_verbose = { 881 "Model": name, 882 "RMSE": rmse, 883 "MAE": mae, 884 "MPL": mpl, 885 "Time taken": time.time() - start, 886 } 887 888 if self.custom_metric is not None: 889 scores_verbose["Custom metric"] = custom_metric 890 891 if self.predictions: 892 predictions[name] = X_pred 893 894 except Exception as exception: 895 if self.ignore_warnings is False: 896 print(name + " model failed to execute") 897 print(exception) 898 899 if (self.replications is not None) or (self.type_pi == "gaussian"): 900 scores = { 901 "Model": names, 902 "RMSE": RMSE, 903 "MAE": MAE, 904 "MPL": MPL, 905 "WINKLERSCORE": WINKLERSCORE, 906 "COVERAGE": COVERAGE, 907 "Time Taken": TIME, 908 } 909 else: 910 scores = { 911 "Model": names, 912 "RMSE": RMSE, 913 "MAE": MAE, 914 "MPL": MPL, 915 "Time Taken": TIME, 916 } 917 918 if self.custom_metric is not None: 919 scores["Custom metric"] = CUSTOM_METRIC 920 921 if per_series: 922 scores = dict_to_dataframe_series(scores, self.series_names) 923 else: 924 scores = pd.DataFrame(scores) 925 926 try: # case per_series, can't be sorted 927 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 928 "Model" 929 ) 930 931 self.best_model_ = self.models_[scores.index[0]] 932 except Exception as e: 933 pass 934 935 if self.predictions is True: 936 937 return scores, predictions 938 939 return scores 940 941 def get_best_model(self): 942 """ 943 This function returns the best model pipeline based on the sort_by metric. 944 945 Returns: 946 947 best_model: object, 948 Returns the best model pipeline based on the sort_by metric. 949 950 """ 951 return self.best_model_ 952 953 def provide_models(self, X_train, X_test): 954 """ 955 This function returns all the model objects trained in fit function. 956 If fit is not called already, then we call fit and then return the models. 957 958 Parameters: 959 960 X_train : array-like, 961 Training vectors, where rows is the number of samples 962 and columns is the number of features. 963 964 X_test : array-like, 965 Testing vectors, where rows is the number of samples 966 and columns is the number of features. 967 968 Returns: 969 970 models: dict-object, 971 Returns a dictionary with each model pipeline as value 972 with key as name of models. 973 974 """ 975 if self.h is None: 976 if len(self.models_.keys()) == 0: 977 self.fit(X_train, X_test) 978 else: 979 if len(self.models_.keys()) == 0: 980 if isinstance(X_test, pd.DataFrame): 981 self.fit(X_train, X_test.iloc[0 : self.h, :]) 982 else: 983 self.fit(X_train, X_test[0 : self.h, :]) 984 985 return self.models_
Fitting -- almost -- all the regression algorithms with layers of nnetsauce's CustomRegressor to multivariate time series and returning their scores.
Parameters:
verbose: int, optional (default=0)
Any positive number for verbosity.
ignore_warnings: bool, optional (default=True)
When set to True, the warning related to algorigms that are not
able to run are ignored.
custom_metric: function, optional (default=None)
When function is provided, models are evaluated based on the custom
evaluation metric provided.
predictions: bool, optional (default=False)
When set to True, the predictions of all the models models are returned as dataframe.
sort_by: string, optional (default='RMSE')
Sort models by a metric. Available options are 'RMSE', 'MAE', 'MPL', 'MPE', 'MAPE',
'R-Squared', 'Adjusted R-Squared' or a custom metric identified by its name and
provided by custom_metric.
random_state: int, optional (default=42)
Reproducibiility seed.
estimators: list, optional (default='all')
list of Estimators (regression algorithms) names or just 'all' (default='all')
preprocess: bool, preprocessing is done when set to True
n_layers: int, optional (default=1)
Number of layers in the network. When set to 1, the model is equivalent to a MTS.
h: int, optional (default=None)
Number of steps ahead to predict (when used, must be > 0 and < X_test.shape[0]).
All the other parameters are the same as MTS's.
Attributes:
models_: dict-object
Returns a dictionary with each model pipeline as value
with key as name of models.
best_model_: object
Returns the best model pipeline based on the sort_by metric.
Examples:
See https://thierrymoudiki.github.io/blog/2023/10/29/python/quasirandomizednn/MTS-LazyPredict
241 def fit(self, X_train, X_test, xreg=None, per_series=False, **kwargs): 242 """Fit Regression algorithms to X_train, predict and score on X_test. 243 244 Parameters: 245 246 X_train: array-like or data frame, 247 Training vectors, where rows is the number of samples 248 and columns is the number of features. 249 250 X_test: array-like or data frame, 251 Testing vectors, where rows is the number of samples 252 and columns is the number of features. 253 254 xreg: array-like, optional (default=None) 255 Additional (external) regressors to be passed to self.obj 256 xreg must be in 'increasing' order (most recent observations last) 257 258 per_series: bool, optional (default=False) 259 When set to True, the metrics are computed series by series. 260 261 **kwargs: dict, optional (default=None) 262 Additional parameters to be passed to `fit` method of `obj`. 263 264 Returns: 265 266 scores: Pandas DataFrame 267 Returns metrics of all the models in a Pandas DataFrame. 268 269 predictions: Pandas DataFrame 270 Returns predictions of all the models in a Pandas DataFrame. 271 272 """ 273 R2 = [] 274 ADJR2 = [] 275 ME = [] 276 MPL = [] 277 RMSE = [] 278 MAE = [] 279 MPE = [] 280 MAPE = [] 281 WINKLERSCORE = [] 282 COVERAGE = [] 283 284 # WIN = [] 285 names = [] 286 TIME = [] 287 predictions = {} 288 289 if self.custom_metric is not None: 290 CUSTOM_METRIC = [] 291 292 if self.h is None: 293 assert X_test is not None, "If h is None, X_test must be provided." 294 295 if isinstance(X_train, np.ndarray): 296 X_train = pd.DataFrame(X_train) 297 X_test = pd.DataFrame(X_test) 298 299 self.series_names = X_train.columns.tolist() 300 301 X_train = convert_df_to_numeric(X_train) 302 X_test = convert_df_to_numeric(X_test) 303 304 numeric_features = X_train.select_dtypes(include=[np.number]).columns 305 categorical_features = X_train.select_dtypes(include=["object"]).columns 306 307 categorical_low, categorical_high = get_card_split( 308 X_train, categorical_features 309 ) 310 311 if self.preprocess: 312 preprocessor = ColumnTransformer( 313 transformers=[ 314 ("numeric", numeric_transformer, numeric_features), 315 ( 316 "categorical_low", 317 categorical_transformer_low, 318 categorical_low, 319 ), 320 ( 321 "categorical_high", 322 categorical_transformer_high, 323 categorical_high, 324 ), 325 ] 326 ) 327 328 # baselines (Classical MTS) ---- 329 for i, name in enumerate(["ARIMA", "ETS", "Theta", "VAR", "VECM"]): 330 try: 331 start = time.time() 332 regr = ClassicalMTS(model=name) 333 regr.fit(X_train, **kwargs) 334 self.models_[name] = regr 335 if self.h is None: 336 X_pred = regr.predict(h=X_test.shape[0], **kwargs) 337 else: 338 assert self.h > 0, "h must be > 0" 339 X_pred = regr.predict(h=self.h, **kwargs) 340 try: 341 X_test = X_test[0 : self.h, :] 342 except Exception as e: 343 X_test = X_test.iloc[0 : self.h, :] 344 345 rmse = mean_errors( 346 actual=X_test, 347 pred=X_pred, 348 scoring="root_mean_squared_error", 349 per_series=per_series, 350 ) 351 mae = mean_errors( 352 actual=X_test, 353 pred=X_pred, 354 scoring="mean_absolute_error", 355 per_series=per_series, 356 ) 357 mpl = mean_errors( 358 actual=X_test, 359 pred=X_pred, 360 scoring="mean_pinball_loss", 361 per_series=per_series, 362 ) 363 except Exception: 364 365 continue 366 367 names.append(name) 368 RMSE.append(rmse) 369 MAE.append(mae) 370 MPL.append(mpl) 371 372 if self.custom_metric is not None: 373 try: 374 if self.h is None: 375 custom_metric = self.custom_metric(X_test, X_pred) 376 else: 377 custom_metric = self.custom_metric(X_test_h, X_pred) 378 CUSTOM_METRIC.append(custom_metric) 379 except Exception as e: 380 custom_metric = np.iinfo(np.float32).max 381 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 382 383 if (self.replications is not None) or (self.type_pi == "gaussian"): 384 if per_series == False: 385 winklerscore = winkler_score(obj=X_pred, actual=X_test, level=95) 386 coveragecalc = coverage(X_pred, X_test, level=95) 387 else: 388 winklerscore = winkler_score( 389 obj=X_pred, actual=X_test, level=95, per_series=True 390 ) 391 coveragecalc = coverage(X_pred, X_test, level=95, per_series=True) 392 WINKLERSCORE.append(winklerscore) 393 COVERAGE.append(coveragecalc) 394 TIME.append(time.time() - start) 395 396 if self.estimators == "all": 397 if self.n_layers <= 1: 398 self.regressors = REGRESSORSMTS 399 else: 400 self.regressors = DEEPREGRESSORSMTS 401 else: 402 if self.n_layers <= 1: 403 self.regressors = [ 404 ("MTS(" + est[0] + ")", est[1]) 405 for est in all_estimators() 406 if ( 407 issubclass(est[1], RegressorMixin) 408 and (est[0] in self.estimators) 409 ) 410 ] 411 else: # self.n_layers > 1 412 self.regressors = [ 413 ("DeepMTS(" + est[0] + ")", est[1]) 414 for est in all_estimators() 415 if ( 416 issubclass(est[1], RegressorMixin) 417 and (est[0] in self.estimators) 418 ) 419 ] 420 421 if self.preprocess is True: 422 for name, model in tqdm(self.regressors): # do parallel exec 423 start = time.time() 424 try: 425 if "random_state" in model().get_params().keys(): 426 pipe = Pipeline( 427 steps=[ 428 ("preprocessor", preprocessor), 429 ( 430 "regressor", 431 DeepMTS( 432 obj=model( 433 random_state=self.random_state, 434 **kwargs, 435 ), 436 n_layers=self.n_layers, 437 n_hidden_features=self.n_hidden_features, 438 activation_name=self.activation_name, 439 a=self.a, 440 nodes_sim=self.nodes_sim, 441 bias=self.bias, 442 dropout=self.dropout, 443 direct_link=self.direct_link, 444 n_clusters=self.n_clusters, 445 cluster_encode=self.cluster_encode, 446 type_clust=self.type_clust, 447 type_scaling=self.type_scaling, 448 lags=self.lags, 449 type_pi=self.type_pi, 450 block_size=self.block_size, 451 replications=self.replications, 452 kernel=self.kernel, 453 agg=self.agg, 454 seed=self.seed, 455 backend=self.backend, 456 show_progress=self.show_progress, 457 ), 458 ), 459 ] 460 ) 461 else: # "random_state" in model().get_params().keys() 462 pipe = Pipeline( 463 steps=[ 464 ("preprocessor", preprocessor), 465 ( 466 "regressor", 467 DeepMTS( 468 obj=model(**kwargs), 469 n_layers=self.n_layers, 470 n_hidden_features=self.n_hidden_features, 471 activation_name=self.activation_name, 472 a=self.a, 473 nodes_sim=self.nodes_sim, 474 bias=self.bias, 475 dropout=self.dropout, 476 direct_link=self.direct_link, 477 n_clusters=self.n_clusters, 478 cluster_encode=self.cluster_encode, 479 type_clust=self.type_clust, 480 type_scaling=self.type_scaling, 481 lags=self.lags, 482 type_pi=self.type_pi, 483 block_size=self.block_size, 484 replications=self.replications, 485 kernel=self.kernel, 486 agg=self.agg, 487 seed=self.seed, 488 backend=self.backend, 489 show_progress=self.show_progress, 490 ), 491 ), 492 ] 493 ) 494 495 pipe.fit(X_train, **kwargs) 496 # pipe.fit(X_train, xreg=xreg) 497 498 self.models_[name] = pipe 499 500 if self.h is None: 501 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 502 else: 503 assert self.h > 0, "h must be > 0" 504 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 505 506 if (self.replications is not None) or (self.type_pi == "gaussian"): 507 rmse = mean_errors( 508 actual=X_test, 509 pred=X_pred, 510 scoring="root_mean_squared_error", 511 per_series=per_series, 512 ) 513 mae = mean_errors( 514 actual=X_test, 515 pred=X_pred, 516 scoring="mean_absolute_error", 517 per_series=per_series, 518 ) 519 mpl = mean_errors( 520 actual=X_test, 521 pred=X_pred, 522 scoring="mean_pinball_loss", 523 per_series=per_series, 524 ) 525 winklerscore = winkler_score( 526 obj=X_pred, 527 actual=X_test, 528 level=95, 529 per_series=per_series, 530 ) 531 coveragecalc = coverage( 532 X_pred, X_test, level=95, per_series=per_series 533 ) 534 else: 535 rmse = mean_errors( 536 actual=X_test, 537 pred=X_pred, 538 scoring="root_mean_squared_error", 539 per_series=per_series, 540 ) 541 mae = mean_errors( 542 actual=X_test, 543 pred=X_pred, 544 scoring="mean_absolute_error", 545 per_series=per_series, 546 ) 547 mpl = mean_errors( 548 actual=X_test, 549 pred=X_pred, 550 scoring="mean_pinball_loss", 551 per_series=per_series, 552 ) 553 554 names.append(name) 555 RMSE.append(rmse) 556 MAE.append(mae) 557 MPL.append(mpl) 558 559 if (self.replications is not None) or (self.type_pi == "gaussian"): 560 WINKLERSCORE.append(winklerscore) 561 COVERAGE.append(coveragecalc) 562 TIME.append(time.time() - start) 563 564 if self.custom_metric is not None: 565 try: 566 custom_metric = self.custom_metric(X_test, X_pred) 567 CUSTOM_METRIC.append(custom_metric) 568 except Exception as e: 569 custom_metric = np.iinfo(np.float32).max 570 CUSTOM_METRIC.append(custom_metric) 571 572 if self.verbose > 0: 573 if (self.replications is not None) or ( 574 self.type_pi == "gaussian" 575 ): 576 scores_verbose = { 577 "Model": name, 578 "RMSE": rmse, 579 "MAE": mae, 580 "MPL": mpl, 581 "WINKLERSCORE": winklerscore, 582 "COVERAGE": coveragecalc, 583 "Time taken": time.time() - start, 584 } 585 else: 586 scores_verbose = { 587 "Model": name, 588 "RMSE": rmse, 589 "MAE": mae, 590 "MPL": mpl, 591 "Time taken": time.time() - start, 592 } 593 594 if self.custom_metric is not None: 595 scores_verbose["Custom metric"] = custom_metric 596 597 if self.predictions: 598 predictions[name] = X_pred 599 except Exception as exception: 600 if self.ignore_warnings is False: 601 print(name + " model failed to execute") 602 print(exception) 603 604 else: # no preprocessing 605 606 for name, model in tqdm(self.regressors): # do parallel exec 607 start = time.time() 608 try: 609 if "random_state" in model().get_params().keys(): 610 pipe = DeepMTS( 611 obj=model(random_state=self.random_state, **kwargs), 612 n_layers=self.n_layers, 613 n_hidden_features=self.n_hidden_features, 614 activation_name=self.activation_name, 615 a=self.a, 616 nodes_sim=self.nodes_sim, 617 bias=self.bias, 618 dropout=self.dropout, 619 direct_link=self.direct_link, 620 n_clusters=self.n_clusters, 621 cluster_encode=self.cluster_encode, 622 type_clust=self.type_clust, 623 type_scaling=self.type_scaling, 624 lags=self.lags, 625 type_pi=self.type_pi, 626 block_size=self.block_size, 627 replications=self.replications, 628 kernel=self.kernel, 629 agg=self.agg, 630 seed=self.seed, 631 backend=self.backend, 632 show_progress=self.show_progress, 633 ) 634 else: 635 pipe = DeepMTS( 636 obj=model(**kwargs), 637 n_layers=self.n_layers, 638 n_hidden_features=self.n_hidden_features, 639 activation_name=self.activation_name, 640 a=self.a, 641 nodes_sim=self.nodes_sim, 642 bias=self.bias, 643 dropout=self.dropout, 644 direct_link=self.direct_link, 645 n_clusters=self.n_clusters, 646 cluster_encode=self.cluster_encode, 647 type_clust=self.type_clust, 648 type_scaling=self.type_scaling, 649 lags=self.lags, 650 type_pi=self.type_pi, 651 block_size=self.block_size, 652 replications=self.replications, 653 kernel=self.kernel, 654 agg=self.agg, 655 seed=self.seed, 656 backend=self.backend, 657 show_progress=self.show_progress, 658 ) 659 660 pipe.fit(X_train, xreg, **kwargs) 661 # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` 662 663 self.models_[name] = pipe 664 665 if self.preprocess is True: 666 if self.h is None: 667 X_pred = pipe["regressor"].predict( 668 h=X_test.shape[0], **kwargs 669 ) 670 else: 671 assert ( 672 self.h > 0 and self.h <= X_test.shape[0] 673 ), "h must be > 0 and < X_test.shape[0]" 674 X_pred = pipe["regressor"].predict(h=self.h, **kwargs) 675 676 else: 677 678 if self.h is None: 679 X_pred = pipe.predict( 680 h=X_test.shape[0], 681 **kwargs, 682 # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` 683 ) 684 else: 685 assert ( 686 self.h > 0 and self.h <= X_test.shape[0] 687 ), "h must be > 0 and < X_test.shape[0]" 688 X_pred = pipe.predict(h=self.h, **kwargs) 689 690 if self.h is None: 691 if (self.replications is not None) or ( 692 self.type_pi == "gaussian" 693 ): 694 rmse = mean_errors( 695 actual=X_test, 696 pred=X_pred.mean, 697 scoring="root_mean_squared_error", 698 per_series=per_series, 699 ) 700 mae = mean_errors( 701 actual=X_test, 702 pred=X_pred.mean, 703 scoring="mean_absolute_error", 704 per_series=per_series, 705 ) 706 mpl = mean_errors( 707 actual=X_test, 708 pred=X_pred.mean, 709 scoring="mean_pinball_loss", 710 per_series=per_series, 711 ) 712 winklerscore = winkler_score( 713 obj=X_pred, 714 actual=X_test, 715 level=95, 716 per_series=per_series, 717 ) 718 coveragecalc = coverage( 719 X_pred, X_test, level=95, per_series=per_series 720 ) 721 else: # no prediction interval 722 rmse = mean_errors( 723 actual=X_test, 724 pred=X_pred, 725 scoring="root_mean_squared_error", 726 per_series=per_series, 727 ) 728 mae = mean_errors( 729 actual=X_test, 730 pred=X_pred, 731 scoring="mean_absolute_error", 732 per_series=per_series, 733 ) 734 mpl = mean_errors( 735 actual=X_test, 736 pred=X_pred, 737 scoring="mean_pinball_loss", 738 per_series=per_series, 739 ) 740 else: # self.h is not None 741 if (self.replications is not None) or ( 742 self.type_pi == "gaussian" 743 ): 744 745 if isinstance(X_test, pd.DataFrame): 746 X_test_h = X_test.iloc[0 : self.h, :] 747 rmse = mean_errors( 748 actual=X_test_h, 749 pred=X_pred, 750 scoring="root_mean_squared_error", 751 per_series=per_series, 752 ) 753 mae = mean_errors( 754 actual=X_test_h, 755 pred=X_pred, 756 scoring="mean_absolute_error", 757 per_series=per_series, 758 ) 759 mpl = mean_errors( 760 actual=X_test_h, 761 pred=X_pred, 762 scoring="mean_pinball_loss", 763 per_series=per_series, 764 ) 765 winklerscore = winkler_score( 766 obj=X_pred, 767 actual=X_test_h, 768 level=95, 769 per_series=per_series, 770 ) 771 coveragecalc = coverage( 772 X_pred, 773 X_test_h, 774 level=95, 775 per_series=per_series, 776 ) 777 else: 778 X_test_h = X_test[0 : self.h, :] 779 rmse = mean_errors( 780 actual=X_test_h, 781 pred=X_pred, 782 scoring="root_mean_squared_error", 783 per_series=per_series, 784 ) 785 mae = mean_errors( 786 actual=X_test_h, 787 pred=X_pred, 788 scoring="mean_absolute_error", 789 per_series=per_series, 790 ) 791 mpl = mean_errors( 792 actual=X_test_h, 793 pred=X_pred, 794 scoring="mean_pinball_loss", 795 per_series=per_series, 796 ) 797 winklerscore = winkler_score( 798 obj=X_pred, 799 actual=X_test_h, 800 level=95, 801 per_series=per_series, 802 ) 803 coveragecalc = coverage( 804 X_pred, 805 X_test_h, 806 level=95, 807 per_series=per_series, 808 ) 809 else: # no prediction interval 810 811 if isinstance(X_test, pd.DataFrame): 812 X_test_h = X_test.iloc[0 : self.h, :] 813 rmse = mean_errors( 814 actual=X_test_h, 815 pred=X_pred, 816 scoring="root_mean_squared_error", 817 per_series=per_series, 818 ) 819 mae = mean_errors( 820 actual=X_test_h, 821 pred=X_pred, 822 scoring="mean_absolute_error", 823 per_series=per_series, 824 ) 825 mpl = mean_errors( 826 actual=X_test_h, 827 pred=X_pred, 828 scoring="mean_pinball_loss", 829 per_series=per_series, 830 ) 831 else: 832 X_test_h = X_test[0 : self.h, :] 833 rmse = mean_errors( 834 actual=X_test_h, 835 pred=X_pred, 836 scoring="root_mean_squared_error", 837 per_series=per_series, 838 ) 839 mae = mean_errors( 840 actual=X_test_h, 841 pred=X_pred, 842 scoring="mean_absolute_error", 843 per_series=per_series, 844 ) 845 846 names.append(name) 847 RMSE.append(rmse) 848 MAE.append(mae) 849 MPL.append(mpl) 850 if (self.replications is not None) or (self.type_pi == "gaussian"): 851 WINKLERSCORE.append(winklerscore) 852 COVERAGE.append(coveragecalc) 853 TIME.append(time.time() - start) 854 855 if self.custom_metric is not None: 856 try: 857 if self.h is None: 858 custom_metric = self.custom_metric(X_test, X_pred) 859 else: 860 custom_metric = self.custom_metric(X_test_h, X_pred) 861 CUSTOM_METRIC.append(custom_metric) 862 except Exception as e: 863 custom_metric = np.iinfo(np.float32).max 864 CUSTOM_METRIC.append(np.iinfo(np.float32).max) 865 866 if self.verbose > 0: 867 if (self.replications is not None) or ( 868 self.type_pi == "gaussian" 869 ): 870 scores_verbose = { 871 "Model": name, 872 "RMSE": rmse, 873 "MAE": mae, 874 "MPL": mpl, 875 "WINKLERSCORE": winklerscore, 876 "COVERAGE": coveragecalc, 877 "Time taken": time.time() - start, 878 } 879 else: 880 scores_verbose = { 881 "Model": name, 882 "RMSE": rmse, 883 "MAE": mae, 884 "MPL": mpl, 885 "Time taken": time.time() - start, 886 } 887 888 if self.custom_metric is not None: 889 scores_verbose["Custom metric"] = custom_metric 890 891 if self.predictions: 892 predictions[name] = X_pred 893 894 except Exception as exception: 895 if self.ignore_warnings is False: 896 print(name + " model failed to execute") 897 print(exception) 898 899 if (self.replications is not None) or (self.type_pi == "gaussian"): 900 scores = { 901 "Model": names, 902 "RMSE": RMSE, 903 "MAE": MAE, 904 "MPL": MPL, 905 "WINKLERSCORE": WINKLERSCORE, 906 "COVERAGE": COVERAGE, 907 "Time Taken": TIME, 908 } 909 else: 910 scores = { 911 "Model": names, 912 "RMSE": RMSE, 913 "MAE": MAE, 914 "MPL": MPL, 915 "Time Taken": TIME, 916 } 917 918 if self.custom_metric is not None: 919 scores["Custom metric"] = CUSTOM_METRIC 920 921 if per_series: 922 scores = dict_to_dataframe_series(scores, self.series_names) 923 else: 924 scores = pd.DataFrame(scores) 925 926 try: # case per_series, can't be sorted 927 scores = scores.sort_values(by=self.sort_by, ascending=True).set_index( 928 "Model" 929 ) 930 931 self.best_model_ = self.models_[scores.index[0]] 932 except Exception as e: 933 pass 934 935 if self.predictions is True: 936 937 return scores, predictions 938 939 return scores
Fit Regression algorithms to X_train, predict and score on X_test.
Parameters:
X_train: array-like or data frame,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test: array-like or data frame,
Testing vectors, where rows is the number of samples
and columns is the number of features.
xreg: array-like, optional (default=None)
Additional (external) regressors to be passed to self.obj
xreg must be in 'increasing' order (most recent observations last)
per_series: bool, optional (default=False)
When set to True, the metrics are computed series by series.
**kwargs: dict, optional (default=None)
Additional parameters to be passed to `fit` method of `obj`.
Returns:
scores: Pandas DataFrame
Returns metrics of all the models in a Pandas DataFrame.
predictions: Pandas DataFrame
Returns predictions of all the models in a Pandas DataFrame.
953 def provide_models(self, X_train, X_test): 954 """ 955 This function returns all the model objects trained in fit function. 956 If fit is not called already, then we call fit and then return the models. 957 958 Parameters: 959 960 X_train : array-like, 961 Training vectors, where rows is the number of samples 962 and columns is the number of features. 963 964 X_test : array-like, 965 Testing vectors, where rows is the number of samples 966 and columns is the number of features. 967 968 Returns: 969 970 models: dict-object, 971 Returns a dictionary with each model pipeline as value 972 with key as name of models. 973 974 """ 975 if self.h is None: 976 if len(self.models_.keys()) == 0: 977 self.fit(X_train, X_test) 978 else: 979 if len(self.models_.keys()) == 0: 980 if isinstance(X_test, pd.DataFrame): 981 self.fit(X_train, X_test.iloc[0 : self.h, :]) 982 else: 983 self.fit(X_train, X_test[0 : self.h, :]) 984 985 return self.models_
This function returns all the model objects trained in fit function. If fit is not called already, then we call fit and then return the models.
Parameters:
X_train : array-like,
Training vectors, where rows is the number of samples
and columns is the number of features.
X_test : array-like,
Testing vectors, where rows is the number of samples
and columns is the number of features.
Returns:
models: dict-object,
Returns a dictionary with each model pipeline as value
with key as name of models.
6class MedianVotingRegressor(VotingRegressor): 7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Prediction voting regressor for unfitted estimators.
A voting regressor is an ensemble meta-estimator that fits several base regressors, each on the whole dataset. Then it averages the individual predictions to form a final prediction.
For a detailed example, refer to
:ref:sphx_glr_auto_examples_ensemble_plot_voting_regressor.py.
Read more in the :ref:User Guide <voting_regressor>.
New in version 0.21.
Parameters
estimators : list of (str, estimator) tuples
Invoking the fit method on the VotingRegressor will fit clones
of those original estimators that will be stored in the class attribute
self.estimators_. An estimator can be set to 'drop' using
set_params().
*Changed in version 0.21:*
``'drop'`` is accepted. Using None was deprecated in 0.22 and
support was removed in 0.24.
weights : array-like of shape (n_regressors,), default=None
Sequence of weights (float or int) to weight the occurrences of
predicted values before averaging. Uses uniform weights if None.
n_jobs : int, default=None
The number of jobs to run in parallel for fit.
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors. See :term:Glossary <n_jobs>
for more details.
verbose : bool, default=False If True, the time elapsed while fitting will be printed as it is completed.
*New in version 0.23.*
Attributes
estimators_ : list of regressors
The collection of fitted sub-estimators as defined in estimators
that are not 'drop'.
named_estimators_ : ~sklearn.utils.Bunch
Attribute to access any fitted sub-estimators by name.
*New in version 0.20.*
n_features_in_ : int
Number of features seen during :term:fit. Only defined if the
underlying regressor exposes such an attribute when fit.
*New in version 0.24.*
feature_names_in_ : ndarray of shape (n_features_in_,)
Names of features seen during :term:fit. Only defined if the
underlying estimators expose such an attribute when fit.
*New in version 1.0.*
See Also
VotingClassifier : Soft Voting/Majority Rule classifier.
Examples
>>> import numpy as np
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.ensemble import RandomForestRegressor
>>> from sklearn.ensemble import VotingRegressor
>>> from sklearn.neighbors import KNeighborsRegressor
>>> r1 = LinearRegression()
>>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
>>> r3 = KNeighborsRegressor()
>>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
>>> y = np.array([2, 6, 12, 20, 30, 42])
>>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
>>> print(er.fit(X, y).predict(X))
[ 6.8... 8.4... 12.5... 17.8... 26... 34...]
In the following example, we drop the 'lr' estimator with
~VotingRegressor.set_params() and fit the remaining two estimators:
>>> er = er.set_params(lr='drop')
>>> er = er.fit(X, y)
>>> len(er.estimators_)
2
7 def predict(self, X): 8 """ 9 Predict using the median of the base regressors' predictions. 10 11 Parameters: 12 X (array-like): Feature matrix for predictions. 13 14 Returns: 15 y_pred (array): Median of predictions from the base regressors. 16 """ 17 predictions = np.asarray( 18 [regressor.predict(X) for regressor in self.estimators_] 19 ) 20 return np.median(predictions, axis=0)
Predict using the median of the base regressors' predictions.
Parameters: X (array-like): Feature matrix for predictions.
Returns: y_pred (array): Median of predictions from the base regressors.
28class MTS(Base): 29 """Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks 30 31 Parameters: 32 33 obj: object. 34 any object containing a method fit (obj.fit()) and a method predict 35 (obj.predict()). 36 37 n_hidden_features: int. 38 number of nodes in the hidden layer. 39 40 activation_name: str. 41 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'. 42 43 a: float. 44 hyperparameter for 'prelu' or 'elu' activation function. 45 46 nodes_sim: str. 47 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 48 'uniform'. 49 50 bias: boolean. 51 indicates if the hidden layer contains a bias term (True) or not 52 (False). 53 54 dropout: float. 55 regularization parameter; (random) percentage of nodes dropped out 56 of the training. 57 58 direct_link: boolean. 59 indicates if the original predictors are included (True) in model's fitting or not (False). 60 61 n_clusters: int. 62 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering). 63 64 cluster_encode: bool. 65 defines how the variable containing clusters is treated (default is one-hot) 66 if `False`, then labels are used, without one-hot encoding. 67 68 type_clust: str. 69 type of clustering method: currently k-means ('kmeans') or Gaussian 70 Mixture Model ('gmm'). 71 72 type_scaling: a tuple of 3 strings. 73 scaling methods for inputs, hidden layer, and clustering respectively 74 (and when relevant). 75 Currently available: standardization ('std') or MinMax scaling ('minmax'). 76 77 lags: int. 78 number of lags used for each time series. 79 If string, lags must be one of 'AIC', 'AICc', or 'BIC'. 80 81 type_pi: str. 82 type of prediction interval; currently: 83 - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case 84 - "kde": based on Kernel Density Estimation of in-sample residuals 85 - "bootstrap": based on independent bootstrap of in-sample residuals 86 - "block-bootstrap": based on basic block bootstrap of in-sample residuals 87 - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals 88 - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals 89 - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals 90 - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals 91 - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals 92 - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals 93 - based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton', 94 'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student' 95 - 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton', 96 'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student' 97 - 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton', 98 'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student' 99 100 block_size: int. 101 size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). 102 Default is round(3.15*(n_residuals^1/3)) 103 104 replications: int. 105 number of replications (if needed, for predictive simulation). Default is 'None'. 106 107 kernel: str. 108 the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'. 109 110 agg: str. 111 either "mean" or "median" for simulation of bootstrap aggregating 112 113 seed: int. 114 reproducibility seed for nodes_sim=='uniform' or predictive simulation. 115 116 backend: str. 117 "cpu" or "gpu" or "tpu". 118 119 verbose: int. 120 0: not printing; 1: printing 121 122 show_progress: bool. 123 True: progress bar when fitting each series; False: no progress bar when fitting each series 124 125 Attributes: 126 127 fit_objs_: dict 128 objects adjusted to each individual time series 129 130 y_: {array-like} 131 MTS responses (most recent observations first) 132 133 X_: {array-like} 134 MTS lags 135 136 xreg_: {array-like} 137 external regressors 138 139 y_means_: dict 140 a dictionary of each series mean values 141 142 preds_: {array-like} 143 successive model predictions 144 145 preds_std_: {array-like} 146 standard deviation around the predictions for Bayesian base learners (`obj`) 147 148 gaussian_preds_std_: {array-like} 149 standard deviation around the predictions for `type_pi='gaussian'` 150 151 return_std_: boolean 152 return uncertainty or not (set in predict) 153 154 df_: data frame 155 the input data frame, in case a data.frame is provided to `fit` 156 157 n_obs_: int 158 number of time series observations (number of rows for multivariate) 159 160 level_: int 161 level of confidence for prediction intervals (default is 95) 162 163 residuals_: {array-like} 164 in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals 165 (for `type_pi` in conformal prediction) 166 167 residuals_sims_: tuple of {array-like} 168 simulations of in-sample residuals (for `type_pi` not conformal prediction) or 169 calibrated residuals (for `type_pi` in conformal prediction) 170 171 kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html 172 173 residuals_std_dev_: residuals standard deviation 174 175 Examples: 176 177 Example 1: 178 179 ```python 180 import nnetsauce as ns 181 import numpy as np 182 from sklearn import linear_model 183 np.random.seed(123) 184 185 M = np.random.rand(10, 3) 186 M[:,0] = 10*M[:,0] 187 M[:,2] = 25*M[:,2] 188 print(M) 189 190 # Adjust Bayesian Ridge 191 regr4 = linear_model.BayesianRidge() 192 obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5) 193 obj_MTS.fit(M) 194 print(obj_MTS.predict()) 195 196 # with credible intervals 197 print(obj_MTS.predict(return_std=True, level=80)) 198 199 print(obj_MTS.predict(return_std=True, level=95)) 200 ``` 201 202 Example 2: 203 204 ```python 205 import nnetsauce as ns 206 import numpy as np 207 from sklearn import linear_model 208 209 dataset = { 210 'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'], 211 'series1' : [34, 30, 35.6, 33.3, 38.1], 212 'series2' : [4, 5.5, 5.6, 6.3, 5.1], 213 'series3' : [100, 100.5, 100.6, 100.2, 100.1]} 214 df = pd.DataFrame(dataset).set_index('date') 215 print(df) 216 217 # Adjust Bayesian Ridge 218 regr5 = linear_model.BayesianRidge() 219 obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5) 220 obj_MTS.fit(df) 221 print(obj_MTS.predict()) 222 223 # with credible intervals 224 print(obj_MTS.predict(return_std=True, level=80)) 225 226 print(obj_MTS.predict(return_std=True, level=95)) 227 ``` 228 """ 229 230 # construct the object ----- 231 232 def __init__( 233 self, 234 obj, 235 n_hidden_features=5, 236 activation_name="relu", 237 a=0.01, 238 nodes_sim="sobol", 239 bias=True, 240 dropout=0, 241 direct_link=True, 242 n_clusters=2, 243 cluster_encode=True, 244 type_clust="kmeans", 245 type_scaling=("std", "std", "std"), 246 lags=1, 247 type_pi="kde", 248 block_size=None, 249 replications=None, 250 kernel="gaussian", 251 agg="mean", 252 seed=123, 253 backend="cpu", 254 verbose=0, 255 show_progress=True, 256 ): 257 258 super().__init__( 259 n_hidden_features=n_hidden_features, 260 activation_name=activation_name, 261 a=a, 262 nodes_sim=nodes_sim, 263 bias=bias, 264 dropout=dropout, 265 direct_link=direct_link, 266 n_clusters=n_clusters, 267 cluster_encode=cluster_encode, 268 type_clust=type_clust, 269 type_scaling=type_scaling, 270 seed=seed, 271 backend=backend, 272 ) 273 274 # Add validation for lags parameter 275 if isinstance(lags, str): 276 assert lags in ( 277 "AIC", 278 "AICc", 279 "BIC", 280 ), "if string, lags must be one of 'AIC', 'AICc', or 'BIC'" 281 else: 282 assert int(lags) == lags, "if numeric, lags parameter should be an integer" 283 284 self.obj = obj 285 self.n_series = None 286 self.lags = lags 287 self.type_pi = type_pi 288 self.block_size = block_size 289 self.replications = replications 290 self.kernel = kernel 291 self.agg = agg 292 self.verbose = verbose 293 self.show_progress = show_progress 294 self.series_names = None 295 self.input_dates = None 296 self.fit_objs_ = {} 297 self.y_ = None # MTS responses (most recent observations first) 298 self.X_ = None # MTS lags 299 self.xreg_ = None 300 self.y_means_ = {} 301 self.mean_ = None 302 self.median_ = None 303 self.upper_ = None 304 self.lower_ = None 305 self.output_dates_ = None 306 self.preds_std_ = [] 307 self.gaussian_preds_std_ = None 308 self.alpha_ = None 309 self.return_std_ = None 310 self.df_ = None 311 self.residuals_ = [] 312 self.abs_calib_residuals_ = None 313 self.calib_residuals_quantile_ = None 314 self.residuals_sims_ = None 315 self.kde_ = None 316 self.sims_ = None 317 self.residuals_std_dev_ = None 318 self.n_obs_ = None 319 self.level_ = None 320 self.init_n_series_ = None 321 322 def fit(self, X, xreg=None, **kwargs): 323 """Fit MTS model to training data X, with optional regressors xreg 324 325 Parameters: 326 327 X: {array-like}, shape = [n_samples, n_features] 328 Training time series, where n_samples is the number 329 of samples and n_features is the number of features; 330 X must be in increasing order (most recent observations last) 331 332 xreg: {array-like}, shape = [n_samples, n_features_xreg] 333 Additional (external) regressors to be passed to self.obj 334 xreg must be in 'increasing' order (most recent observations last) 335 336 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 337 338 Returns: 339 340 self: object 341 """ 342 343 self.init_n_series_ = X.shape[1] 344 345 # Automatic lag selection if requested 346 if isinstance(self.lags, str): 347 max_lags = min(25, X.shape[0] // 4) 348 best_ic = float("inf") 349 best_lags = 1 350 351 if self.verbose: 352 print(f"\nSelecting optimal number of lags using {self.lags}...") 353 iterator = tqdm(range(1, max_lags + 1)) 354 else: 355 iterator = range(1, max_lags + 1) 356 357 for lag in iterator: 358 # Convert DataFrame to numpy array before reversing 359 if isinstance(X, pd.DataFrame): 360 X_values = X.values[::-1] 361 else: 362 X_values = X[::-1] 363 364 # Try current lag value 365 if self.init_n_series_ > 1: 366 mts_input = ts.create_train_inputs(X_values, lag) 367 else: 368 mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag) 369 370 # Cook training set and fit model 371 dummy_y, scaled_Z = self.cook_training_set( 372 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 373 ) 374 residuals_ = [] 375 376 for i in range(self.init_n_series_): 377 y_mean = np.mean(mts_input[0][:, i]) 378 centered_y_i = mts_input[0][:, i] - y_mean 379 self.obj.fit(X=scaled_Z, y=centered_y_i) 380 residuals_.append( 381 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 382 ) 383 384 self.residuals_ = np.asarray(residuals_).T 385 ic = self._compute_information_criterion( 386 curr_lags=lag, criterion=self.lags 387 ) 388 389 if self.verbose: 390 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 391 392 if ic < best_ic: 393 best_ic = ic 394 best_lags = lag 395 396 if self.verbose: 397 print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}") 398 399 self.lags = best_lags 400 401 self.input_dates = None 402 self.df_ = None 403 404 if isinstance(X, pd.DataFrame) is False: 405 # input data set is a numpy array 406 if xreg is None: 407 X = pd.DataFrame(X) 408 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 409 else: 410 # xreg is not None 411 X = mo.cbind(X, xreg) 412 self.xreg_ = xreg 413 414 else: # input data set is a DataFrame with column names 415 416 X_index = None 417 if X.index is not None: 418 X_index = X.index 419 if xreg is None: 420 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 421 else: 422 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 423 self.xreg_ = xreg 424 if X_index is not None: 425 X.index = X_index 426 self.series_names = X.columns.tolist() 427 428 if isinstance(X, pd.DataFrame): 429 if self.df_ is None: 430 self.df_ = X 431 X = X.values 432 else: 433 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 434 frequency = pd.infer_freq(input_dates_prev) 435 self.df_ = pd.concat([self.df_, X], axis=0) 436 self.input_dates = pd.date_range( 437 start=input_dates_prev[0], 438 periods=len(input_dates_prev) + X.shape[0], 439 freq=frequency, 440 ).values.tolist() 441 self.df_.index = self.input_dates 442 X = self.df_.values 443 self.df_.columns = self.series_names 444 else: 445 if self.df_ is None: 446 self.df_ = pd.DataFrame(X, columns=self.series_names) 447 else: 448 self.df_ = pd.concat( 449 [self.df_, pd.DataFrame(X, columns=self.series_names)], 450 axis=0, 451 ) 452 453 self.input_dates = ts.compute_input_dates(self.df_) 454 455 try: 456 # multivariate time series 457 n, p = X.shape 458 except: 459 # univariate time series 460 n = X.shape[0] 461 p = 1 462 self.n_obs_ = n 463 464 rep_1_n = np.repeat(1, n) 465 466 self.y_ = None 467 self.X_ = None 468 self.n_series = p 469 self.fit_objs_.clear() 470 self.y_means_.clear() 471 residuals_ = [] 472 self.residuals_ = None 473 self.residuals_sims_ = None 474 self.kde_ = None 475 self.sims_ = None 476 self.scaled_Z_ = None 477 self.centered_y_is_ = [] 478 479 if self.init_n_series_ > 1: 480 # multivariate time series 481 mts_input = ts.create_train_inputs(X[::-1], self.lags) 482 else: 483 # univariate time series 484 mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags) 485 486 self.y_ = mts_input[0] 487 488 self.X_ = mts_input[1] 489 490 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 491 492 self.scaled_Z_ = scaled_Z 493 494 # loop on all the time series and adjust self.obj.fit 495 if self.verbose > 0: 496 print( 497 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 498 ) 499 500 if self.show_progress is True: 501 iterator = tqdm(range(self.init_n_series_)) 502 else: 503 iterator = range(self.init_n_series_) 504 505 if self.type_pi in ( 506 "gaussian", 507 "kde", 508 "bootstrap", 509 "block-bootstrap", 510 ) or self.type_pi.startswith("vine"): 511 for i in iterator: 512 y_mean = np.mean(self.y_[:, i]) 513 self.y_means_[i] = y_mean 514 centered_y_i = self.y_[:, i] - y_mean 515 self.centered_y_is_.append(centered_y_i) 516 self.obj.fit(X=scaled_Z, y=centered_y_i) 517 self.fit_objs_[i] = deepcopy(self.obj) 518 residuals_.append( 519 (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist() 520 ) 521 522 if self.type_pi.startswith("scp"): 523 # split conformal prediction 524 for i in iterator: 525 n_y = self.y_.shape[0] 526 n_y_half = n_y // 2 527 first_half_idx = range(0, n_y_half) 528 second_half_idx = range(n_y_half, n_y) 529 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 530 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 531 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 532 # calibrated residuals actually 533 residuals_.append( 534 ( 535 self.y_[second_half_idx, i] 536 - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :])) 537 ).tolist() 538 ) 539 # fit on the second half 540 y_mean = np.mean(self.y_[second_half_idx, i]) 541 self.y_means_[i] = y_mean 542 centered_y_i = self.y_[second_half_idx, i] - y_mean 543 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 546 self.residuals_ = np.asarray(residuals_).T 547 548 if self.type_pi == "gaussian": 549 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 550 551 if self.type_pi.startswith("scp2"): 552 # Calculate mean and standard deviation for each column 553 data_mean = np.mean(self.residuals_, axis=0) 554 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 555 # Center and scale the array using broadcasting 556 self.residuals_ = ( 557 self.residuals_ - data_mean[np.newaxis, :] 558 ) / self.residuals_std_dev_[np.newaxis, :] 559 560 if self.replications != None and "kde" in self.type_pi: 561 if self.verbose > 0: 562 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 563 assert self.kernel in ( 564 "gaussian", 565 "tophat", 566 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 567 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 568 grid = GridSearchCV( 569 KernelDensity(kernel=self.kernel, **kwargs), 570 param_grid=kernel_bandwidths, 571 ) 572 grid.fit(self.residuals_) 573 574 if self.verbose > 0: 575 print( 576 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 577 ) 578 579 self.kde_ = grid.best_estimator_ 580 581 return self 582 583 def partial_fit(self, X, xreg=None, **kwargs): 584 """Update the model with new observations X, with optional regressors xreg 585 586 Parameters: 587 588 X: {array-like}, shape = [n_samples, n_features] 589 Training time series, where n_samples is the number 590 of samples and n_features is the number of features; 591 X must be in increasing order (most recent observations last) 592 593 xreg: {array-like}, shape = [n_samples, n_features_xreg] 594 Additional (external) regressors to be passed to self.obj 595 xreg must be in 'increasing' order (most recent observations last) 596 597 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 598 599 Returns: 600 601 self: object 602 """ 603 604 assert self.df_ is not None, "fit() must be called before partial_fit()" 605 606 if (isinstance(X, pd.DataFrame) is False) and isinstance(X, pd.Series) is False: 607 if len(X.shape) == 1: 608 X = X.reshape(1, -1) 609 610 return self.fit(X, xreg, **kwargs) 611 612 else: 613 if len(X.shape) == 1: 614 X = pd.DataFrame(X.values.reshape(1, -1), columns=self.df_.columns) 615 616 return self.fit(X, xreg, **kwargs) 617 618 def predict(self, h=5, level=95, **kwargs): 619 """Forecast all the time series, h steps ahead""" 620 621 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 622 623 self.level_ = level 624 625 self.return_std_ = False # do not remove (/!\) 626 627 self.mean_ = None # do not remove (/!\) 628 629 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 630 631 self.lower_ = None # do not remove (/!\) 632 633 self.upper_ = None # do not remove (/!\) 634 635 self.sims_ = None # do not remove (/!\) 636 637 y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)]) 638 639 n_features = self.init_n_series_ * self.lags 640 641 self.alpha_ = 100 - level 642 643 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 644 645 if "return_std" in kwargs: # bayesian forecasting 646 self.return_std_ = True 647 self.preds_std_ = [] 648 DescribeResult = namedtuple( 649 "DescribeResult", ("mean", "lower", "upper") 650 ) # to be updated 651 652 if "return_pi" in kwargs: # split conformal, without simulation 653 mean_pi_ = [] 654 lower_pi_ = [] 655 upper_pi_ = [] 656 median_pi_ = [] 657 DescribeResult = namedtuple( 658 "DescribeResult", ("mean", "lower", "upper") 659 ) # to be updated 660 661 if self.kde_ != None and "kde" in self.type_pi: # kde 662 target_cols = self.df_.columns[ 663 : self.init_n_series_ 664 ] # Get target column names 665 if self.verbose == 1: 666 self.residuals_sims_ = tuple( 667 self.kde_.sample( 668 n_samples=h, random_state=self.seed + 100 * i 669 ) # Keep full sample 670 for i in tqdm(range(self.replications)) 671 ) 672 elif self.verbose == 0: 673 self.residuals_sims_ = tuple( 674 self.kde_.sample( 675 n_samples=h, random_state=self.seed + 100 * i 676 ) # Keep full sample 677 for i in range(self.replications) 678 ) 679 680 # Convert to DataFrames after sampling 681 self.residuals_sims_ = tuple( 682 pd.DataFrame( 683 sim, # Keep all columns 684 columns=target_cols, # Use original target column names 685 index=self.output_dates_, 686 ) 687 for sim in self.residuals_sims_ 688 ) 689 690 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 691 assert self.replications is not None and isinstance( 692 self.replications, int 693 ), "'replications' must be provided and be an integer" 694 if self.verbose == 1: 695 self.residuals_sims_ = tuple( 696 ts.bootstrap( 697 self.residuals_, 698 h=h, 699 block_size=None, 700 seed=self.seed + 100 * i, 701 ) 702 for i in tqdm(range(self.replications)) 703 ) 704 elif self.verbose == 0: 705 self.residuals_sims_ = tuple( 706 ts.bootstrap( 707 self.residuals_, 708 h=h, 709 block_size=None, 710 seed=self.seed + 100 * i, 711 ) 712 for i in range(self.replications) 713 ) 714 715 if self.type_pi in ( 716 "block-bootstrap", 717 "scp-block-bootstrap", 718 "scp2-block-bootstrap", 719 ): 720 if self.block_size is None: 721 self.block_size = int( 722 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 723 ) 724 725 assert self.replications is not None and isinstance( 726 self.replications, int 727 ), "'replications' must be provided and be an integer" 728 if self.verbose == 1: 729 self.residuals_sims_ = tuple( 730 ts.bootstrap( 731 self.residuals_, 732 h=h, 733 block_size=self.block_size, 734 seed=self.seed + 100 * i, 735 ) 736 for i in tqdm(range(self.replications)) 737 ) 738 elif self.verbose == 0: 739 self.residuals_sims_ = tuple( 740 ts.bootstrap( 741 self.residuals_, 742 h=h, 743 block_size=self.block_size, 744 seed=self.seed + 100 * i, 745 ) 746 for i in range(self.replications) 747 ) 748 749 if "vine" in self.type_pi: 750 if self.verbose == 1: 751 self.residuals_sims_ = tuple( 752 vinecopula_sample( 753 x=self.residuals_, 754 n_samples=h, 755 method=self.type_pi, 756 random_state=self.seed + 100 * i, 757 ) 758 for i in tqdm(range(self.replications)) 759 ) 760 elif self.verbose == 0: 761 self.residuals_sims_ = tuple( 762 vinecopula_sample( 763 x=self.residuals_, 764 n_samples=h, 765 method=self.type_pi, 766 random_state=self.seed + 100 * i, 767 ) 768 for i in range(self.replications) 769 ) 770 771 mean_ = deepcopy(self.mean_) 772 773 for i in range(h): 774 775 new_obs = ts.reformat_response(mean_, self.lags) 776 new_X = new_obs.reshape(1, -1) 777 cooked_new_X = self.cook_test_set(new_X, **kwargs) 778 779 if "return_std" in kwargs: 780 self.preds_std_.append( 781 [ 782 np.asarray( 783 self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1] 784 ).item() 785 for i in range(self.n_series) 786 ] 787 ) 788 789 if "return_pi" in kwargs: 790 for i in range(self.n_series): 791 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 792 mean_pi_.append(preds_pi.mean[0]) 793 lower_pi_.append(preds_pi.lower[0]) 794 upper_pi_.append(preds_pi.upper[0]) 795 796 predicted_cooked_new_X = np.asarray( 797 [ 798 np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item() 799 for i in range(self.init_n_series_) 800 ] 801 ) 802 803 preds = np.asarray(y_means_ + predicted_cooked_new_X) 804 805 # Create full row with both predictions and external regressors 806 if self.xreg_ is not None and "xreg" in kwargs: 807 next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten() 808 full_row = np.concatenate([preds, next_xreg]) 809 else: 810 full_row = preds 811 812 # Create a new row with same number of columns as mean_ 813 new_row = np.zeros((1, mean_.shape[1])) 814 new_row[0, : full_row.shape[0]] = full_row 815 816 # Maintain the full dimensionality by using vstack instead of rbind 817 mean_ = np.vstack([new_row, mean_[:-1]]) 818 819 # Final output should only include the target columns 820 self.mean_ = pd.DataFrame( 821 mean_[0:h, : self.init_n_series_][::-1], 822 columns=self.df_.columns[: self.init_n_series_], 823 index=self.output_dates_, 824 ) 825 826 # function's return ---------------------------------------------------------------------- 827 if ( 828 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 829 and (self.type_pi not in ("gaussian", "scp")) 830 ) or ("vine" in self.type_pi): 831 832 if self.replications is None: 833 return self.mean_.iloc[:, : self.init_n_series_] 834 835 # if "return_std" not in kwargs and self.replications is not None 836 meanf = [] 837 medianf = [] 838 lower = [] 839 upper = [] 840 841 if "scp2" in self.type_pi: 842 843 if self.verbose == 1: 844 self.sims_ = tuple( 845 ( 846 self.mean_ 847 + self.residuals_sims_[i] 848 * self.residuals_std_dev_[np.newaxis, :] 849 for i in tqdm(range(self.replications)) 850 ) 851 ) 852 elif self.verbose == 0: 853 self.sims_ = tuple( 854 ( 855 self.mean_ 856 + self.residuals_sims_[i] 857 * self.residuals_std_dev_[np.newaxis, :] 858 for i in range(self.replications) 859 ) 860 ) 861 else: 862 863 if self.verbose == 1: 864 self.sims_ = tuple( 865 ( 866 self.mean_ + self.residuals_sims_[i] 867 for i in tqdm(range(self.replications)) 868 ) 869 ) 870 elif self.verbose == 0: 871 self.sims_ = tuple( 872 ( 873 self.mean_ + self.residuals_sims_[i] 874 for i in range(self.replications) 875 ) 876 ) 877 878 DescribeResult = namedtuple( 879 "DescribeResult", ("mean", "sims", "lower", "upper") 880 ) 881 for ix in range(self.init_n_series_): 882 sims_ix = getsims(self.sims_, ix) 883 if self.agg == "mean": 884 meanf.append(np.mean(sims_ix, axis=1)) 885 else: 886 medianf.append(np.median(sims_ix, axis=1)) 887 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 888 upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)) 889 self.mean_ = pd.DataFrame( 890 np.asarray(meanf).T, 891 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 892 index=self.output_dates_, 893 ) 894 895 self.lower_ = pd.DataFrame( 896 np.asarray(lower).T, 897 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 898 index=self.output_dates_, 899 ) 900 901 self.upper_ = pd.DataFrame( 902 np.asarray(upper).T, 903 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 904 index=self.output_dates_, 905 ) 906 907 try: 908 self.median_ = pd.DataFrame( 909 np.asarray(medianf).T, 910 columns=self.series_names[ 911 : self.init_n_series_ 912 ], # self.df_.columns, 913 index=self.output_dates_, 914 ) 915 except Exception as e: 916 pass 917 918 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 919 920 if ( 921 (("return_std" in kwargs) or ("return_pi" in kwargs)) 922 and (self.type_pi not in ("gaussian", "scp")) 923 ) or "vine" in self.type_pi: 924 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 925 926 self.mean_ = pd.DataFrame( 927 np.asarray(self.mean_), 928 columns=self.series_names, # self.df_.columns, 929 index=self.output_dates_, 930 ) 931 932 if "return_std" in kwargs: 933 934 self.preds_std_ = np.asarray(self.preds_std_) 935 print("self.preds_std_", self.preds_std_) 936 print("self.mean_", self.mean_) 937 print("pi_multiplier", pi_multiplier) 938 939 self.lower_ = pd.DataFrame( 940 self.mean_.values - pi_multiplier * self.preds_std_, 941 columns=self.series_names, # self.df_.columns, 942 index=self.output_dates_, 943 ) 944 945 self.upper_ = pd.DataFrame( 946 self.mean_.values + pi_multiplier * self.preds_std_, 947 columns=self.series_names, # self.df_.columns, 948 index=self.output_dates_, 949 ) 950 951 if "return_pi" in kwargs: 952 953 self.lower_ = pd.DataFrame( 954 np.asarray(lower_pi_).reshape(h, self.n_series) 955 + y_means_[np.newaxis, :], 956 columns=self.series_names, # self.df_.columns, 957 index=self.output_dates_, 958 ) 959 960 self.upper_ = pd.DataFrame( 961 np.asarray(upper_pi_).reshape(h, self.n_series) 962 + y_means_[np.newaxis, :], 963 columns=self.series_names, # self.df_.columns, 964 index=self.output_dates_, 965 ) 966 967 res = DescribeResult(self.mean_, self.lower_, self.upper_) 968 969 if self.xreg_ is not None: 970 if len(self.xreg_.shape) > 1: 971 res2 = mx.tuple_map( 972 res, 973 lambda x: mo.delete_last_columns( 974 x, num_columns=self.xreg_.shape[1] 975 ), 976 ) 977 else: 978 res2 = mx.tuple_map( 979 res, lambda x: mo.delete_last_columns(x, num_columns=1) 980 ) 981 return DescribeResult(res2[0], res2[1], res2[2]) 982 983 return res 984 985 if self.type_pi == "gaussian": 986 987 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 988 989 self.mean_ = pd.DataFrame( 990 np.asarray(self.mean_), 991 columns=self.series_names, # self.df_.columns, 992 index=self.output_dates_, 993 ) 994 995 self.lower_ = pd.DataFrame( 996 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 997 columns=self.series_names, # self.df_.columns, 998 index=self.output_dates_, 999 ) 1000 1001 self.upper_ = pd.DataFrame( 1002 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1003 columns=self.series_names, # self.df_.columns, 1004 index=self.output_dates_, 1005 ) 1006 1007 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1008 1009 if self.xreg_ is not None: 1010 if len(self.xreg_.shape) > 1: 1011 res2 = mx.tuple_map( 1012 res, 1013 lambda x: mo.delete_last_columns( 1014 x, num_columns=self.xreg_.shape[1] 1015 ), 1016 ) 1017 else: 1018 res2 = mx.tuple_map( 1019 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1020 ) 1021 return DescribeResult(res2[0], res2[1], res2[2]) 1022 1023 return res 1024 1025 # After prediction loop, ensure sims only contain target columns 1026 if self.sims_ is not None: 1027 if self.verbose == 1: 1028 self.sims_ = tuple( 1029 sim[:h,] # Only keep target columns and h rows 1030 for sim in tqdm(self.sims_) 1031 ) 1032 elif self.verbose == 0: 1033 self.sims_ = tuple( 1034 sim[:h,] # Only keep target columns and h rows 1035 for sim in self.sims_ 1036 ) 1037 1038 # Convert numpy arrays to DataFrames with proper columns 1039 self.sims_ = tuple( 1040 pd.DataFrame( 1041 sim, 1042 columns=self.df_.columns[: self.init_n_series_], 1043 index=self.output_dates_, 1044 ) 1045 for sim in self.sims_ 1046 ) 1047 1048 if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"): 1049 if self.xreg_ is not None: 1050 # Use getsimsxreg when external regressors are present 1051 target_cols = self.df_.columns[: self.init_n_series_] 1052 self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols) 1053 else: 1054 # Use original getsims for backward compatibility 1055 self.sims_ = getsims(self.sims_) 1056 1057 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 1058 """Train on training_index, score on testing_index.""" 1059 1060 assert ( 1061 bool(set(training_index).intersection(set(testing_index))) == False 1062 ), "Non-overlapping 'training_index' and 'testing_index' required" 1063 1064 # Dimensions 1065 try: 1066 # multivariate time series 1067 n, p = X.shape 1068 except: 1069 # univariate time series 1070 n = X.shape[0] 1071 p = 1 1072 1073 # Training and testing sets 1074 if p > 1: 1075 X_train = X[training_index, :] 1076 X_test = X[testing_index, :] 1077 else: 1078 X_train = X[training_index] 1079 X_test = X[testing_index] 1080 1081 # Horizon 1082 h = len(testing_index) 1083 assert ( 1084 len(training_index) + h 1085 ) <= n, "Please check lengths of training and testing windows" 1086 1087 # Fit and predict 1088 self.fit(X_train, **kwargs) 1089 preds = self.predict(h=h, **kwargs) 1090 1091 if scoring is None: 1092 scoring = "neg_root_mean_squared_error" 1093 1094 # check inputs 1095 assert scoring in ( 1096 "explained_variance", 1097 "neg_mean_absolute_error", 1098 "neg_mean_squared_error", 1099 "neg_root_mean_squared_error", 1100 "neg_mean_squared_log_error", 1101 "neg_median_absolute_error", 1102 "r2", 1103 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1104 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1105 'neg_median_absolute_error', 'r2')" 1106 1107 scoring_options = { 1108 "explained_variance": skm2.explained_variance_score, 1109 "neg_mean_absolute_error": skm2.mean_absolute_error, 1110 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1111 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 1112 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1113 "neg_median_absolute_error": skm2.median_absolute_error, 1114 "r2": skm2.r2_score, 1115 } 1116 1117 return scoring_options[scoring](X_test, preds) 1118 1119 def plot(self, series=None, type_axis="dates", type_plot="pi"): 1120 """Plot time series forecast 1121 1122 Parameters: 1123 1124 series: {integer} or {string} 1125 series index or name 1126 1127 """ 1128 1129 assert all( 1130 [ 1131 self.mean_ is not None, 1132 self.lower_ is not None, 1133 self.upper_ is not None, 1134 self.output_dates_ is not None, 1135 ] 1136 ), "model forecasting must be obtained first (with predict)" 1137 1138 if series is None: 1139 # assert ( 1140 # self.init_n_series_ == 1 1141 # ), "please specify series index or name (n_series > 1)" 1142 series = 0 1143 1144 if isinstance(series, str): 1145 assert ( 1146 series in self.series_names 1147 ), f"series {series} doesn't exist in the input dataset" 1148 series_idx = self.df_.columns.get_loc(series) 1149 else: 1150 assert isinstance(series, int) and ( 1151 0 <= series < self.n_series 1152 ), f"check series index (< {self.n_series})" 1153 series_idx = series 1154 1155 y_all = list(self.df_.iloc[:, series_idx]) + list( 1156 self.mean_.iloc[:, series_idx] 1157 ) 1158 y_test = list(self.mean_.iloc[:, series_idx]) 1159 n_points_all = len(y_all) 1160 n_points_train = self.df_.shape[0] 1161 1162 if type_axis == "numeric": 1163 x_all = [i for i in range(n_points_all)] 1164 x_test = [i for i in range(n_points_train, n_points_all)] 1165 1166 if type_axis == "dates": # use dates 1167 x_all = np.concatenate( 1168 (self.input_dates.values, self.output_dates_.values), axis=None 1169 ) 1170 x_test = self.output_dates_.values 1171 1172 if type_plot == "pi": 1173 fig, ax = plt.subplots() 1174 ax.plot(x_all, y_all, "-") 1175 ax.plot(x_test, y_test, "-", color="orange") 1176 ax.fill_between( 1177 x_test, 1178 self.lower_.iloc[:, series_idx], 1179 self.upper_.iloc[:, series_idx], 1180 alpha=0.2, 1181 color="orange", 1182 ) 1183 if self.replications is None: 1184 if self.n_series > 1: 1185 plt.title( 1186 f"prediction intervals for {series}", 1187 loc="left", 1188 fontsize=12, 1189 fontweight=0, 1190 color="black", 1191 ) 1192 else: 1193 plt.title( 1194 f"prediction intervals for input time series", 1195 loc="left", 1196 fontsize=12, 1197 fontweight=0, 1198 color="black", 1199 ) 1200 plt.show() 1201 else: # self.replications is not None 1202 if self.n_series > 1: 1203 plt.title( 1204 f"prediction intervals for {self.replications} simulations of {series}", 1205 loc="left", 1206 fontsize=12, 1207 fontweight=0, 1208 color="black", 1209 ) 1210 else: 1211 plt.title( 1212 f"prediction intervals for {self.replications} simulations of input time series", 1213 loc="left", 1214 fontsize=12, 1215 fontweight=0, 1216 color="black", 1217 ) 1218 plt.show() 1219 1220 if type_plot == "spaghetti": 1221 palette = plt.get_cmap("Set1") 1222 sims_ix = getsims(self.sims_, series_idx) 1223 plt.plot(x_all, y_all, "-") 1224 for col_ix in range( 1225 sims_ix.shape[1] 1226 ): # avoid this when there are thousands of simulations 1227 plt.plot( 1228 x_test, 1229 sims_ix[:, col_ix], 1230 "-", 1231 color=palette(col_ix), 1232 linewidth=1, 1233 alpha=0.9, 1234 ) 1235 plt.plot(x_all, y_all, "-", color="black") 1236 plt.plot(x_test, y_test, "-", color="blue") 1237 # Add titles 1238 if self.n_series > 1: 1239 plt.title( 1240 f"{self.replications} simulations of {series}", 1241 loc="left", 1242 fontsize=12, 1243 fontweight=0, 1244 color="black", 1245 ) 1246 else: 1247 plt.title( 1248 f"{self.replications} simulations of input time series", 1249 loc="left", 1250 fontsize=12, 1251 fontweight=0, 1252 color="black", 1253 ) 1254 plt.xlabel("Time") 1255 plt.ylabel("Values") 1256 # Show the graph 1257 plt.show() 1258 1259 def cross_val_score( 1260 self, 1261 X, 1262 scoring="root_mean_squared_error", 1263 n_jobs=None, 1264 verbose=0, 1265 xreg=None, 1266 initial_window=5, 1267 horizon=3, 1268 fixed_window=False, 1269 show_progress=True, 1270 level=95, 1271 **kwargs, 1272 ): 1273 """Evaluate a score by time series cross-validation. 1274 1275 Parameters: 1276 1277 X: {array-like, sparse matrix} of shape (n_samples, n_features) 1278 The data to fit. 1279 1280 scoring: str or a function 1281 A str in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 1282 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 1283 'mean_absolute_percentage_error', 'winkler_score', 'coverage') 1284 Or a function defined as 'coverage' and 'winkler_score' in `utils.timeseries` 1285 1286 n_jobs: int, default=None 1287 Number of jobs to run in parallel. 1288 1289 verbose: int, default=0 1290 The verbosity level. 1291 1292 xreg: array-like, optional (default=None) 1293 Additional (external) regressors to be passed to `fit` 1294 xreg must be in 'increasing' order (most recent observations last) 1295 1296 initial_window: int 1297 initial number of consecutive values in each training set sample 1298 1299 horizon: int 1300 number of consecutive values in test set sample 1301 1302 fixed_window: boolean 1303 if False, all training samples start at index 0, and the training 1304 window's size is increasing. 1305 if True, the training window's size is fixed, and the window is 1306 rolling forward 1307 1308 show_progress: boolean 1309 if True, a progress bar is printed 1310 1311 **kwargs: dict 1312 additional parameters to be passed to `fit` and `predict` 1313 1314 Returns: 1315 1316 A tuple: descriptive statistics or errors and raw errors 1317 1318 """ 1319 tscv = TimeSeriesSplit() 1320 1321 tscv_obj = tscv.split( 1322 X, 1323 initial_window=initial_window, 1324 horizon=horizon, 1325 fixed_window=fixed_window, 1326 ) 1327 1328 if isinstance(scoring, str): 1329 1330 assert scoring in ( 1331 "root_mean_squared_error", 1332 "mean_squared_error", 1333 "mean_error", 1334 "mean_absolute_error", 1335 "mean_percentage_error", 1336 "mean_absolute_percentage_error", 1337 "winkler_score", 1338 "coverage", 1339 ), "must have scoring in ('root_mean_squared_error', 'mean_squared_error', 'mean_error', 'mean_absolute_error', 'mean_error', 'mean_percentage_error', 'mean_absolute_percentage_error', 'winkler_score', 'coverage')" 1340 1341 def err_func(X_test, X_pred, scoring): 1342 if (self.replications is not None) or ( 1343 self.type_pi == "gaussian" 1344 ): # probabilistic 1345 if scoring == "winkler_score": 1346 return winkler_score(X_pred, X_test, level=level) 1347 elif scoring == "coverage": 1348 return coverage(X_pred, X_test, level=level) 1349 else: 1350 return mean_errors( 1351 pred=X_pred.mean, actual=X_test, scoring=scoring 1352 ) 1353 else: # not probabilistic 1354 return mean_errors(pred=X_pred, actual=X_test, scoring=scoring) 1355 1356 else: # isinstance(scoring, str) = False 1357 1358 err_func = scoring 1359 1360 errors = [] 1361 1362 train_indices = [] 1363 1364 test_indices = [] 1365 1366 for train_index, test_index in tscv_obj: 1367 train_indices.append(train_index) 1368 test_indices.append(test_index) 1369 1370 if show_progress is True: 1371 iterator = tqdm(zip(train_indices, test_indices), total=len(train_indices)) 1372 else: 1373 iterator = zip(train_indices, test_indices) 1374 1375 for train_index, test_index in iterator: 1376 1377 if verbose == 1: 1378 print(f"TRAIN: {train_index}") 1379 print(f"TEST: {test_index}") 1380 1381 if isinstance(X, pd.DataFrame): 1382 self.fit(X.iloc[train_index, :], xreg=xreg, **kwargs) 1383 X_test = X.iloc[test_index, :] 1384 else: 1385 self.fit(X[train_index, :], xreg=xreg, **kwargs) 1386 X_test = X[test_index, :] 1387 X_pred = self.predict(h=int(len(test_index)), level=level, **kwargs) 1388 1389 errors.append(err_func(X_test, X_pred, scoring)) 1390 1391 res = np.asarray(errors) 1392 1393 return res, describe(res) 1394 1395 def _compute_information_criterion(self, curr_lags, criterion="AIC"): 1396 """Compute information criterion using existing residuals 1397 1398 Parameters 1399 ---------- 1400 curr_lags : int 1401 Current number of lags being evaluated 1402 criterion : str 1403 One of 'AIC', 'AICc', or 'BIC' 1404 1405 Returns 1406 ------- 1407 float 1408 Information criterion value or inf if parameters exceed observations 1409 """ 1410 # Get dimensions 1411 n_obs = self.residuals_.shape[0] 1412 n_features = int(self.init_n_series_ * curr_lags) 1413 n_hidden = int(self.n_hidden_features) 1414 1415 # Calculate number of parameters 1416 term1 = int(n_features * n_hidden) 1417 term2 = int(n_hidden * self.init_n_series_) 1418 n_params = term1 + term2 1419 1420 # Check if we have enough observations for the number of parameters 1421 if n_obs <= n_params + 1: 1422 return float("inf") # Return infinity if too many parameters 1423 1424 # Compute RSS using existing residuals 1425 rss = np.sum(self.residuals_**2) 1426 1427 # Compute criterion 1428 if criterion == "AIC": 1429 ic = n_obs * np.log(rss / n_obs) + 2 * n_params 1430 elif criterion == "AICc": 1431 ic = n_obs * np.log(rss / n_obs) + 2 * n_params * ( 1432 n_obs / (n_obs - n_params - 1) 1433 ) 1434 else: # BIC 1435 ic = n_obs * np.log(rss / n_obs) + n_params * np.log(n_obs) 1436 1437 return ic
Univariate and multivariate time series (MTS) forecasting with Quasi-Randomized networks
Parameters:
obj: object.
any object containing a method fit (obj.fit()) and a method predict
(obj.predict()).
n_hidden_features: int.
number of nodes in the hidden layer.
activation_name: str.
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'.
a: float.
hyperparameter for 'prelu' or 'elu' activation function.
nodes_sim: str.
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'.
bias: boolean.
indicates if the hidden layer contains a bias term (True) or not
(False).
dropout: float.
regularization parameter; (random) percentage of nodes dropped out
of the training.
direct_link: boolean.
indicates if the original predictors are included (True) in model's fitting or not (False).
n_clusters: int.
number of clusters for 'kmeans' or 'gmm' clustering (could be 0: no clustering).
cluster_encode: bool.
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding.
type_clust: str.
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm').
type_scaling: a tuple of 3 strings.
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax').
lags: int.
number of lags used for each time series.
If string, lags must be one of 'AIC', 'AICc', or 'BIC'.
type_pi: str.
type of prediction interval; currently:
- "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
- "kde": based on Kernel Density Estimation of in-sample residuals
- "bootstrap": based on independent bootstrap of in-sample residuals
- "block-bootstrap": based on basic block bootstrap of in-sample residuals
- "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
- "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
- "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
- "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
- "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
- "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
- based on copulas of in-sample residuals: 'vine-tll', 'vine-bb1', 'vine-bb6', 'vine-bb7', 'vine-bb8', 'vine-clayton',
'vine-frank', 'vine-gaussian', 'vine-gumbel', 'vine-indep', 'vine-joe', 'vine-student'
- 'scp-vine-tll', 'scp-vine-bb1', 'scp-vine-bb6', 'scp-vine-bb7', 'scp-vine-bb8', 'scp-vine-clayton',
'scp-vine-frank', 'scp-vine-gaussian', 'scp-vine-gumbel', 'scp-vine-indep', 'scp-vine-joe', 'scp-vine-student'
- 'scp2-vine-tll', 'scp2-vine-bb1', 'scp2-vine-bb6', 'scp2-vine-bb7', 'scp2-vine-bb8', 'scp2-vine-clayton',
'scp2-vine-frank', 'scp2-vine-gaussian', 'scp2-vine-gumbel', 'scp2-vine-indep', 'scp2-vine-joe', 'scp2-vine-student'
block_size: int.
size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
Default is round(3.15*(n_residuals^1/3))
replications: int.
number of replications (if needed, for predictive simulation). Default is 'None'.
kernel: str.
the kernel to use for residuals density estimation (used for predictive simulation). Currently, either 'gaussian' or 'tophat'.
agg: str.
either "mean" or "median" for simulation of bootstrap aggregating
seed: int.
reproducibility seed for nodes_sim=='uniform' or predictive simulation.
backend: str.
"cpu" or "gpu" or "tpu".
verbose: int.
0: not printing; 1: printing
show_progress: bool.
True: progress bar when fitting each series; False: no progress bar when fitting each series
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
y_: {array-like}
MTS responses (most recent observations first)
X_: {array-like}
MTS lags
xreg_: {array-like}
external regressors
y_means_: dict
a dictionary of each series mean values
preds_: {array-like}
successive model predictions
preds_std_: {array-like}
standard deviation around the predictions for Bayesian base learners (`obj`)
gaussian_preds_std_: {array-like}
standard deviation around the predictions for `type_pi='gaussian'`
return_std_: boolean
return uncertainty or not (set in predict)
df_: data frame
the input data frame, in case a data.frame is provided to `fit`
n_obs_: int
number of time series observations (number of rows for multivariate)
level_: int
level of confidence for prediction intervals (default is 95)
residuals_: {array-like}
in-sample residuals (for `type_pi` not conformal prediction) or calibrated residuals
(for `type_pi` in conformal prediction)
residuals_sims_: tuple of {array-like}
simulations of in-sample residuals (for `type_pi` not conformal prediction) or
calibrated residuals (for `type_pi` in conformal prediction)
kde_: A scikit-learn object, see https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KernelDensity.html
residuals_std_dev_: residuals standard deviation
Examples:
Example 1:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
np.random.seed(123)
M = np.random.rand(10, 3)
M[:,0] = 10*M[:,0]
M[:,2] = 25*M[:,2]
print(M)
# Adjust Bayesian Ridge
regr4 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr4, lags = 1, n_hidden_features=5)
obj_MTS.fit(M)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
Example 2:
import nnetsauce as ns
import numpy as np
from sklearn import linear_model
dataset = {
'date' : ['2001-01-01', '2002-01-01', '2003-01-01', '2004-01-01', '2005-01-01'],
'series1' : [34, 30, 35.6, 33.3, 38.1],
'series2' : [4, 5.5, 5.6, 6.3, 5.1],
'series3' : [100, 100.5, 100.6, 100.2, 100.1]}
df = pd.DataFrame(dataset).set_index('date')
print(df)
# Adjust Bayesian Ridge
regr5 = linear_model.BayesianRidge()
obj_MTS = ns.MTS(regr5, lags = 1, n_hidden_features=5)
obj_MTS.fit(df)
print(obj_MTS.predict())
# with credible intervals
print(obj_MTS.predict(return_std=True, level=80))
print(obj_MTS.predict(return_std=True, level=95))
322 def fit(self, X, xreg=None, **kwargs): 323 """Fit MTS model to training data X, with optional regressors xreg 324 325 Parameters: 326 327 X: {array-like}, shape = [n_samples, n_features] 328 Training time series, where n_samples is the number 329 of samples and n_features is the number of features; 330 X must be in increasing order (most recent observations last) 331 332 xreg: {array-like}, shape = [n_samples, n_features_xreg] 333 Additional (external) regressors to be passed to self.obj 334 xreg must be in 'increasing' order (most recent observations last) 335 336 **kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity) 337 338 Returns: 339 340 self: object 341 """ 342 343 self.init_n_series_ = X.shape[1] 344 345 # Automatic lag selection if requested 346 if isinstance(self.lags, str): 347 max_lags = min(25, X.shape[0] // 4) 348 best_ic = float("inf") 349 best_lags = 1 350 351 if self.verbose: 352 print(f"\nSelecting optimal number of lags using {self.lags}...") 353 iterator = tqdm(range(1, max_lags + 1)) 354 else: 355 iterator = range(1, max_lags + 1) 356 357 for lag in iterator: 358 # Convert DataFrame to numpy array before reversing 359 if isinstance(X, pd.DataFrame): 360 X_values = X.values[::-1] 361 else: 362 X_values = X[::-1] 363 364 # Try current lag value 365 if self.init_n_series_ > 1: 366 mts_input = ts.create_train_inputs(X_values, lag) 367 else: 368 mts_input = ts.create_train_inputs(X_values.reshape(-1, 1), lag) 369 370 # Cook training set and fit model 371 dummy_y, scaled_Z = self.cook_training_set( 372 y=np.ones(mts_input[0].shape[0]), X=mts_input[1] 373 ) 374 residuals_ = [] 375 376 for i in range(self.init_n_series_): 377 y_mean = np.mean(mts_input[0][:, i]) 378 centered_y_i = mts_input[0][:, i] - y_mean 379 self.obj.fit(X=scaled_Z, y=centered_y_i) 380 residuals_.append( 381 (centered_y_i - self.obj.predict(scaled_Z)).tolist() 382 ) 383 384 self.residuals_ = np.asarray(residuals_).T 385 ic = self._compute_information_criterion( 386 curr_lags=lag, criterion=self.lags 387 ) 388 389 if self.verbose: 390 print(f"Trying lags={lag}, {self.lags}={ic:.2f}") 391 392 if ic < best_ic: 393 best_ic = ic 394 best_lags = lag 395 396 if self.verbose: 397 print(f"\nSelected {best_lags} lags with {self.lags}={best_ic:.2f}") 398 399 self.lags = best_lags 400 401 self.input_dates = None 402 self.df_ = None 403 404 if isinstance(X, pd.DataFrame) is False: 405 # input data set is a numpy array 406 if xreg is None: 407 X = pd.DataFrame(X) 408 self.series_names = ["series" + str(i) for i in range(X.shape[1])] 409 else: 410 # xreg is not None 411 X = mo.cbind(X, xreg) 412 self.xreg_ = xreg 413 414 else: # input data set is a DataFrame with column names 415 416 X_index = None 417 if X.index is not None: 418 X_index = X.index 419 if xreg is None: 420 X = copy.deepcopy(mo.convert_df_to_numeric(X)) 421 else: 422 X = copy.deepcopy(mo.cbind(mo.convert_df_to_numeric(X), xreg)) 423 self.xreg_ = xreg 424 if X_index is not None: 425 X.index = X_index 426 self.series_names = X.columns.tolist() 427 428 if isinstance(X, pd.DataFrame): 429 if self.df_ is None: 430 self.df_ = X 431 X = X.values 432 else: 433 input_dates_prev = pd.DatetimeIndex(self.df_.index.values) 434 frequency = pd.infer_freq(input_dates_prev) 435 self.df_ = pd.concat([self.df_, X], axis=0) 436 self.input_dates = pd.date_range( 437 start=input_dates_prev[0], 438 periods=len(input_dates_prev) + X.shape[0], 439 freq=frequency, 440 ).values.tolist() 441 self.df_.index = self.input_dates 442 X = self.df_.values 443 self.df_.columns = self.series_names 444 else: 445 if self.df_ is None: 446 self.df_ = pd.DataFrame(X, columns=self.series_names) 447 else: 448 self.df_ = pd.concat( 449 [self.df_, pd.DataFrame(X, columns=self.series_names)], 450 axis=0, 451 ) 452 453 self.input_dates = ts.compute_input_dates(self.df_) 454 455 try: 456 # multivariate time series 457 n, p = X.shape 458 except: 459 # univariate time series 460 n = X.shape[0] 461 p = 1 462 self.n_obs_ = n 463 464 rep_1_n = np.repeat(1, n) 465 466 self.y_ = None 467 self.X_ = None 468 self.n_series = p 469 self.fit_objs_.clear() 470 self.y_means_.clear() 471 residuals_ = [] 472 self.residuals_ = None 473 self.residuals_sims_ = None 474 self.kde_ = None 475 self.sims_ = None 476 self.scaled_Z_ = None 477 self.centered_y_is_ = [] 478 479 if self.init_n_series_ > 1: 480 # multivariate time series 481 mts_input = ts.create_train_inputs(X[::-1], self.lags) 482 else: 483 # univariate time series 484 mts_input = ts.create_train_inputs(X.reshape(-1, 1)[::-1], self.lags) 485 486 self.y_ = mts_input[0] 487 488 self.X_ = mts_input[1] 489 490 dummy_y, scaled_Z = self.cook_training_set(y=rep_1_n, X=self.X_) 491 492 self.scaled_Z_ = scaled_Z 493 494 # loop on all the time series and adjust self.obj.fit 495 if self.verbose > 0: 496 print( 497 f"\n Adjusting {type(self.obj).__name__} to multivariate time series... \n" 498 ) 499 500 if self.show_progress is True: 501 iterator = tqdm(range(self.init_n_series_)) 502 else: 503 iterator = range(self.init_n_series_) 504 505 if self.type_pi in ( 506 "gaussian", 507 "kde", 508 "bootstrap", 509 "block-bootstrap", 510 ) or self.type_pi.startswith("vine"): 511 for i in iterator: 512 y_mean = np.mean(self.y_[:, i]) 513 self.y_means_[i] = y_mean 514 centered_y_i = self.y_[:, i] - y_mean 515 self.centered_y_is_.append(centered_y_i) 516 self.obj.fit(X=scaled_Z, y=centered_y_i) 517 self.fit_objs_[i] = deepcopy(self.obj) 518 residuals_.append( 519 (centered_y_i - self.fit_objs_[i].predict(scaled_Z)).tolist() 520 ) 521 522 if self.type_pi.startswith("scp"): 523 # split conformal prediction 524 for i in iterator: 525 n_y = self.y_.shape[0] 526 n_y_half = n_y // 2 527 first_half_idx = range(0, n_y_half) 528 second_half_idx = range(n_y_half, n_y) 529 y_mean_temp = np.mean(self.y_[first_half_idx, i]) 530 centered_y_i_temp = self.y_[first_half_idx, i] - y_mean_temp 531 self.obj.fit(X=scaled_Z[first_half_idx, :], y=centered_y_i_temp) 532 # calibrated residuals actually 533 residuals_.append( 534 ( 535 self.y_[second_half_idx, i] 536 - (y_mean_temp + self.obj.predict(scaled_Z[second_half_idx, :])) 537 ).tolist() 538 ) 539 # fit on the second half 540 y_mean = np.mean(self.y_[second_half_idx, i]) 541 self.y_means_[i] = y_mean 542 centered_y_i = self.y_[second_half_idx, i] - y_mean 543 self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) 544 self.fit_objs_[i] = deepcopy(self.obj) 545 546 self.residuals_ = np.asarray(residuals_).T 547 548 if self.type_pi == "gaussian": 549 self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) 550 551 if self.type_pi.startswith("scp2"): 552 # Calculate mean and standard deviation for each column 553 data_mean = np.mean(self.residuals_, axis=0) 554 self.residuals_std_dev_ = np.std(self.residuals_, axis=0) 555 # Center and scale the array using broadcasting 556 self.residuals_ = ( 557 self.residuals_ - data_mean[np.newaxis, :] 558 ) / self.residuals_std_dev_[np.newaxis, :] 559 560 if self.replications != None and "kde" in self.type_pi: 561 if self.verbose > 0: 562 print(f"\n Simulate residuals using {self.kernel} kernel... \n") 563 assert self.kernel in ( 564 "gaussian", 565 "tophat", 566 ), "currently, 'kernel' must be either 'gaussian' or 'tophat'" 567 kernel_bandwidths = {"bandwidth": np.logspace(-6, 6, 150)} 568 grid = GridSearchCV( 569 KernelDensity(kernel=self.kernel, **kwargs), 570 param_grid=kernel_bandwidths, 571 ) 572 grid.fit(self.residuals_) 573 574 if self.verbose > 0: 575 print( 576 f"\n Best parameters for {self.kernel} kernel: {grid.best_params_} \n" 577 ) 578 579 self.kde_ = grid.best_estimator_ 580 581 return self
Fit MTS model to training data X, with optional regressors xreg
Parameters:
X: {array-like}, shape = [n_samples, n_features] Training time series, where n_samples is the number of samples and n_features is the number of features; X must be in increasing order (most recent observations last)
xreg: {array-like}, shape = [n_samples, n_features_xreg] Additional (external) regressors to be passed to self.obj xreg must be in 'increasing' order (most recent observations last)
**kwargs: for now, additional parameters to be passed to for kernel density estimation, when needed (see sklearn.neighbors.KernelDensity)
Returns:
self: object
618 def predict(self, h=5, level=95, **kwargs): 619 """Forecast all the time series, h steps ahead""" 620 621 self.output_dates_, frequency = ts.compute_output_dates(self.df_, h) 622 623 self.level_ = level 624 625 self.return_std_ = False # do not remove (/!\) 626 627 self.mean_ = None # do not remove (/!\) 628 629 self.mean_ = deepcopy(self.y_) # do not remove (/!\) 630 631 self.lower_ = None # do not remove (/!\) 632 633 self.upper_ = None # do not remove (/!\) 634 635 self.sims_ = None # do not remove (/!\) 636 637 y_means_ = np.asarray([self.y_means_[i] for i in range(self.init_n_series_)]) 638 639 n_features = self.init_n_series_ * self.lags 640 641 self.alpha_ = 100 - level 642 643 pi_multiplier = norm.ppf(1 - self.alpha_ / 200) 644 645 if "return_std" in kwargs: # bayesian forecasting 646 self.return_std_ = True 647 self.preds_std_ = [] 648 DescribeResult = namedtuple( 649 "DescribeResult", ("mean", "lower", "upper") 650 ) # to be updated 651 652 if "return_pi" in kwargs: # split conformal, without simulation 653 mean_pi_ = [] 654 lower_pi_ = [] 655 upper_pi_ = [] 656 median_pi_ = [] 657 DescribeResult = namedtuple( 658 "DescribeResult", ("mean", "lower", "upper") 659 ) # to be updated 660 661 if self.kde_ != None and "kde" in self.type_pi: # kde 662 target_cols = self.df_.columns[ 663 : self.init_n_series_ 664 ] # Get target column names 665 if self.verbose == 1: 666 self.residuals_sims_ = tuple( 667 self.kde_.sample( 668 n_samples=h, random_state=self.seed + 100 * i 669 ) # Keep full sample 670 for i in tqdm(range(self.replications)) 671 ) 672 elif self.verbose == 0: 673 self.residuals_sims_ = tuple( 674 self.kde_.sample( 675 n_samples=h, random_state=self.seed + 100 * i 676 ) # Keep full sample 677 for i in range(self.replications) 678 ) 679 680 # Convert to DataFrames after sampling 681 self.residuals_sims_ = tuple( 682 pd.DataFrame( 683 sim, # Keep all columns 684 columns=target_cols, # Use original target column names 685 index=self.output_dates_, 686 ) 687 for sim in self.residuals_sims_ 688 ) 689 690 if self.type_pi in ("bootstrap", "scp-bootstrap", "scp2-bootstrap"): 691 assert self.replications is not None and isinstance( 692 self.replications, int 693 ), "'replications' must be provided and be an integer" 694 if self.verbose == 1: 695 self.residuals_sims_ = tuple( 696 ts.bootstrap( 697 self.residuals_, 698 h=h, 699 block_size=None, 700 seed=self.seed + 100 * i, 701 ) 702 for i in tqdm(range(self.replications)) 703 ) 704 elif self.verbose == 0: 705 self.residuals_sims_ = tuple( 706 ts.bootstrap( 707 self.residuals_, 708 h=h, 709 block_size=None, 710 seed=self.seed + 100 * i, 711 ) 712 for i in range(self.replications) 713 ) 714 715 if self.type_pi in ( 716 "block-bootstrap", 717 "scp-block-bootstrap", 718 "scp2-block-bootstrap", 719 ): 720 if self.block_size is None: 721 self.block_size = int( 722 np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) 723 ) 724 725 assert self.replications is not None and isinstance( 726 self.replications, int 727 ), "'replications' must be provided and be an integer" 728 if self.verbose == 1: 729 self.residuals_sims_ = tuple( 730 ts.bootstrap( 731 self.residuals_, 732 h=h, 733 block_size=self.block_size, 734 seed=self.seed + 100 * i, 735 ) 736 for i in tqdm(range(self.replications)) 737 ) 738 elif self.verbose == 0: 739 self.residuals_sims_ = tuple( 740 ts.bootstrap( 741 self.residuals_, 742 h=h, 743 block_size=self.block_size, 744 seed=self.seed + 100 * i, 745 ) 746 for i in range(self.replications) 747 ) 748 749 if "vine" in self.type_pi: 750 if self.verbose == 1: 751 self.residuals_sims_ = tuple( 752 vinecopula_sample( 753 x=self.residuals_, 754 n_samples=h, 755 method=self.type_pi, 756 random_state=self.seed + 100 * i, 757 ) 758 for i in tqdm(range(self.replications)) 759 ) 760 elif self.verbose == 0: 761 self.residuals_sims_ = tuple( 762 vinecopula_sample( 763 x=self.residuals_, 764 n_samples=h, 765 method=self.type_pi, 766 random_state=self.seed + 100 * i, 767 ) 768 for i in range(self.replications) 769 ) 770 771 mean_ = deepcopy(self.mean_) 772 773 for i in range(h): 774 775 new_obs = ts.reformat_response(mean_, self.lags) 776 new_X = new_obs.reshape(1, -1) 777 cooked_new_X = self.cook_test_set(new_X, **kwargs) 778 779 if "return_std" in kwargs: 780 self.preds_std_.append( 781 [ 782 np.asarray( 783 self.fit_objs_[i].predict(cooked_new_X, return_std=True)[1] 784 ).item() 785 for i in range(self.n_series) 786 ] 787 ) 788 789 if "return_pi" in kwargs: 790 for i in range(self.n_series): 791 preds_pi = self.fit_objs_[i].predict(cooked_new_X, **kwargs) 792 mean_pi_.append(preds_pi.mean[0]) 793 lower_pi_.append(preds_pi.lower[0]) 794 upper_pi_.append(preds_pi.upper[0]) 795 796 predicted_cooked_new_X = np.asarray( 797 [ 798 np.asarray(self.fit_objs_[i].predict(cooked_new_X)).item() 799 for i in range(self.init_n_series_) 800 ] 801 ) 802 803 preds = np.asarray(y_means_ + predicted_cooked_new_X) 804 805 # Create full row with both predictions and external regressors 806 if self.xreg_ is not None and "xreg" in kwargs: 807 next_xreg = kwargs["xreg"].iloc[i : i + 1].values.flatten() 808 full_row = np.concatenate([preds, next_xreg]) 809 else: 810 full_row = preds 811 812 # Create a new row with same number of columns as mean_ 813 new_row = np.zeros((1, mean_.shape[1])) 814 new_row[0, : full_row.shape[0]] = full_row 815 816 # Maintain the full dimensionality by using vstack instead of rbind 817 mean_ = np.vstack([new_row, mean_[:-1]]) 818 819 # Final output should only include the target columns 820 self.mean_ = pd.DataFrame( 821 mean_[0:h, : self.init_n_series_][::-1], 822 columns=self.df_.columns[: self.init_n_series_], 823 index=self.output_dates_, 824 ) 825 826 # function's return ---------------------------------------------------------------------- 827 if ( 828 (("return_std" not in kwargs) and ("return_pi" not in kwargs)) 829 and (self.type_pi not in ("gaussian", "scp")) 830 ) or ("vine" in self.type_pi): 831 832 if self.replications is None: 833 return self.mean_.iloc[:, : self.init_n_series_] 834 835 # if "return_std" not in kwargs and self.replications is not None 836 meanf = [] 837 medianf = [] 838 lower = [] 839 upper = [] 840 841 if "scp2" in self.type_pi: 842 843 if self.verbose == 1: 844 self.sims_ = tuple( 845 ( 846 self.mean_ 847 + self.residuals_sims_[i] 848 * self.residuals_std_dev_[np.newaxis, :] 849 for i in tqdm(range(self.replications)) 850 ) 851 ) 852 elif self.verbose == 0: 853 self.sims_ = tuple( 854 ( 855 self.mean_ 856 + self.residuals_sims_[i] 857 * self.residuals_std_dev_[np.newaxis, :] 858 for i in range(self.replications) 859 ) 860 ) 861 else: 862 863 if self.verbose == 1: 864 self.sims_ = tuple( 865 ( 866 self.mean_ + self.residuals_sims_[i] 867 for i in tqdm(range(self.replications)) 868 ) 869 ) 870 elif self.verbose == 0: 871 self.sims_ = tuple( 872 ( 873 self.mean_ + self.residuals_sims_[i] 874 for i in range(self.replications) 875 ) 876 ) 877 878 DescribeResult = namedtuple( 879 "DescribeResult", ("mean", "sims", "lower", "upper") 880 ) 881 for ix in range(self.init_n_series_): 882 sims_ix = getsims(self.sims_, ix) 883 if self.agg == "mean": 884 meanf.append(np.mean(sims_ix, axis=1)) 885 else: 886 medianf.append(np.median(sims_ix, axis=1)) 887 lower.append(np.quantile(sims_ix, q=self.alpha_ / 200, axis=1)) 888 upper.append(np.quantile(sims_ix, q=1 - self.alpha_ / 200, axis=1)) 889 self.mean_ = pd.DataFrame( 890 np.asarray(meanf).T, 891 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 892 index=self.output_dates_, 893 ) 894 895 self.lower_ = pd.DataFrame( 896 np.asarray(lower).T, 897 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 898 index=self.output_dates_, 899 ) 900 901 self.upper_ = pd.DataFrame( 902 np.asarray(upper).T, 903 columns=self.series_names[: self.init_n_series_], # self.df_.columns, 904 index=self.output_dates_, 905 ) 906 907 try: 908 self.median_ = pd.DataFrame( 909 np.asarray(medianf).T, 910 columns=self.series_names[ 911 : self.init_n_series_ 912 ], # self.df_.columns, 913 index=self.output_dates_, 914 ) 915 except Exception as e: 916 pass 917 918 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 919 920 if ( 921 (("return_std" in kwargs) or ("return_pi" in kwargs)) 922 and (self.type_pi not in ("gaussian", "scp")) 923 ) or "vine" in self.type_pi: 924 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 925 926 self.mean_ = pd.DataFrame( 927 np.asarray(self.mean_), 928 columns=self.series_names, # self.df_.columns, 929 index=self.output_dates_, 930 ) 931 932 if "return_std" in kwargs: 933 934 self.preds_std_ = np.asarray(self.preds_std_) 935 print("self.preds_std_", self.preds_std_) 936 print("self.mean_", self.mean_) 937 print("pi_multiplier", pi_multiplier) 938 939 self.lower_ = pd.DataFrame( 940 self.mean_.values - pi_multiplier * self.preds_std_, 941 columns=self.series_names, # self.df_.columns, 942 index=self.output_dates_, 943 ) 944 945 self.upper_ = pd.DataFrame( 946 self.mean_.values + pi_multiplier * self.preds_std_, 947 columns=self.series_names, # self.df_.columns, 948 index=self.output_dates_, 949 ) 950 951 if "return_pi" in kwargs: 952 953 self.lower_ = pd.DataFrame( 954 np.asarray(lower_pi_).reshape(h, self.n_series) 955 + y_means_[np.newaxis, :], 956 columns=self.series_names, # self.df_.columns, 957 index=self.output_dates_, 958 ) 959 960 self.upper_ = pd.DataFrame( 961 np.asarray(upper_pi_).reshape(h, self.n_series) 962 + y_means_[np.newaxis, :], 963 columns=self.series_names, # self.df_.columns, 964 index=self.output_dates_, 965 ) 966 967 res = DescribeResult(self.mean_, self.lower_, self.upper_) 968 969 if self.xreg_ is not None: 970 if len(self.xreg_.shape) > 1: 971 res2 = mx.tuple_map( 972 res, 973 lambda x: mo.delete_last_columns( 974 x, num_columns=self.xreg_.shape[1] 975 ), 976 ) 977 else: 978 res2 = mx.tuple_map( 979 res, lambda x: mo.delete_last_columns(x, num_columns=1) 980 ) 981 return DescribeResult(res2[0], res2[1], res2[2]) 982 983 return res 984 985 if self.type_pi == "gaussian": 986 987 DescribeResult = namedtuple("DescribeResult", ("mean", "lower", "upper")) 988 989 self.mean_ = pd.DataFrame( 990 np.asarray(self.mean_), 991 columns=self.series_names, # self.df_.columns, 992 index=self.output_dates_, 993 ) 994 995 self.lower_ = pd.DataFrame( 996 self.mean_.values - pi_multiplier * self.gaussian_preds_std_, 997 columns=self.series_names, # self.df_.columns, 998 index=self.output_dates_, 999 ) 1000 1001 self.upper_ = pd.DataFrame( 1002 self.mean_.values + pi_multiplier * self.gaussian_preds_std_, 1003 columns=self.series_names, # self.df_.columns, 1004 index=self.output_dates_, 1005 ) 1006 1007 res = DescribeResult(self.mean_, self.lower_, self.upper_) 1008 1009 if self.xreg_ is not None: 1010 if len(self.xreg_.shape) > 1: 1011 res2 = mx.tuple_map( 1012 res, 1013 lambda x: mo.delete_last_columns( 1014 x, num_columns=self.xreg_.shape[1] 1015 ), 1016 ) 1017 else: 1018 res2 = mx.tuple_map( 1019 res, lambda x: mo.delete_last_columns(x, num_columns=1) 1020 ) 1021 return DescribeResult(res2[0], res2[1], res2[2]) 1022 1023 return res 1024 1025 # After prediction loop, ensure sims only contain target columns 1026 if self.sims_ is not None: 1027 if self.verbose == 1: 1028 self.sims_ = tuple( 1029 sim[:h,] # Only keep target columns and h rows 1030 for sim in tqdm(self.sims_) 1031 ) 1032 elif self.verbose == 0: 1033 self.sims_ = tuple( 1034 sim[:h,] # Only keep target columns and h rows 1035 for sim in self.sims_ 1036 ) 1037 1038 # Convert numpy arrays to DataFrames with proper columns 1039 self.sims_ = tuple( 1040 pd.DataFrame( 1041 sim, 1042 columns=self.df_.columns[: self.init_n_series_], 1043 index=self.output_dates_, 1044 ) 1045 for sim in self.sims_ 1046 ) 1047 1048 if self.type_pi in ("kde", "bootstrap", "block-bootstrap", "vine-copula"): 1049 if self.xreg_ is not None: 1050 # Use getsimsxreg when external regressors are present 1051 target_cols = self.df_.columns[: self.init_n_series_] 1052 self.sims_ = getsimsxreg(self.sims_, self.output_dates_, target_cols) 1053 else: 1054 # Use original getsims for backward compatibility 1055 self.sims_ = getsims(self.sims_)
Forecast all the time series, h steps ahead
1057 def score(self, X, training_index, testing_index, scoring=None, **kwargs): 1058 """Train on training_index, score on testing_index.""" 1059 1060 assert ( 1061 bool(set(training_index).intersection(set(testing_index))) == False 1062 ), "Non-overlapping 'training_index' and 'testing_index' required" 1063 1064 # Dimensions 1065 try: 1066 # multivariate time series 1067 n, p = X.shape 1068 except: 1069 # univariate time series 1070 n = X.shape[0] 1071 p = 1 1072 1073 # Training and testing sets 1074 if p > 1: 1075 X_train = X[training_index, :] 1076 X_test = X[testing_index, :] 1077 else: 1078 X_train = X[training_index] 1079 X_test = X[testing_index] 1080 1081 # Horizon 1082 h = len(testing_index) 1083 assert ( 1084 len(training_index) + h 1085 ) <= n, "Please check lengths of training and testing windows" 1086 1087 # Fit and predict 1088 self.fit(X_train, **kwargs) 1089 preds = self.predict(h=h, **kwargs) 1090 1091 if scoring is None: 1092 scoring = "neg_root_mean_squared_error" 1093 1094 # check inputs 1095 assert scoring in ( 1096 "explained_variance", 1097 "neg_mean_absolute_error", 1098 "neg_mean_squared_error", 1099 "neg_root_mean_squared_error", 1100 "neg_mean_squared_log_error", 1101 "neg_median_absolute_error", 1102 "r2", 1103 ), "'scoring' should be in ('explained_variance', 'neg_mean_absolute_error', \ 1104 'neg_mean_squared_error', 'neg_root_mean_squared_error', 'neg_mean_squared_log_error', \ 1105 'neg_median_absolute_error', 'r2')" 1106 1107 scoring_options = { 1108 "explained_variance": skm2.explained_variance_score, 1109 "neg_mean_absolute_error": skm2.mean_absolute_error, 1110 "neg_mean_squared_error": lambda x, y: np.mean((x - y) ** 2), 1111 "neg_root_mean_squared_error": lambda x, y: np.sqrt(np.mean((x - y) ** 2)), 1112 "neg_mean_squared_log_error": skm2.mean_squared_log_error, 1113 "neg_median_absolute_error": skm2.median_absolute_error, 1114 "r2": skm2.r2_score, 1115 } 1116 1117 return scoring_options[scoring](X_test, preds)
Train on training_index, score on testing_index.
16class MultitaskClassifier(Base, ClassifierMixin): 17 """Multitask Classification model based on regression models, with shared covariates 18 19 Parameters: 20 21 obj: object 22 any object (must be a regression model) containing a method fit (obj.fit()) 23 and a method predict (obj.predict()) 24 25 n_hidden_features: int 26 number of nodes in the hidden layer 27 28 activation_name: str 29 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 30 31 a: float 32 hyperparameter for 'prelu' or 'elu' activation function 33 34 nodes_sim: str 35 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 36 'uniform' 37 38 bias: boolean 39 indicates if the hidden layer contains a bias term (True) or not 40 (False) 41 42 dropout: float 43 regularization parameter; (random) percentage of nodes dropped out 44 of the training 45 46 direct_link: boolean 47 indicates if the original predictors are included (True) in model's 48 fitting or not (False) 49 50 n_clusters: int 51 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 52 no clustering) 53 54 cluster_encode: bool 55 defines how the variable containing clusters is treated (default is one-hot) 56 if `False`, then labels are used, without one-hot encoding 57 58 type_clust: str 59 type of clustering method: currently k-means ('kmeans') or Gaussian 60 Mixture Model ('gmm') 61 62 type_scaling: a tuple of 3 strings 63 scaling methods for inputs, hidden layer, and clustering respectively 64 (and when relevant). 65 Currently available: standardization ('std') or MinMax scaling ('minmax') 66 67 col_sample: float 68 percentage of covariates randomly chosen for training 69 70 row_sample: float 71 percentage of rows chosen for training, by stratified bootstrapping 72 73 seed: int 74 reproducibility seed for nodes_sim=='uniform' 75 76 backend: str 77 "cpu" or "gpu" or "tpu" 78 79 Attributes: 80 81 fit_objs_: dict 82 objects adjusted to each individual time series 83 84 n_classes_: int 85 number of classes for the classifier 86 87 Examples: 88 89 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py) 90 91 ```python 92 import nnetsauce as ns 93 import numpy as np 94 from sklearn.datasets import load_breast_cancer 95 from sklearn.linear_model import LinearRegression 96 from sklearn.model_selection import train_test_split 97 from sklearn import metrics 98 from time import time 99 100 breast_cancer = load_breast_cancer() 101 Z = breast_cancer.data 102 t = breast_cancer.target 103 104 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 105 random_state=123+2*10) 106 107 # Linear Regression is used 108 regr = LinearRegression() 109 fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5, 110 n_clusters=2, type_clust="gmm") 111 112 start = time() 113 fit_obj.fit(X_train, y_train) 114 print(f"Elapsed {time() - start}") 115 116 print(fit_obj.score(X_test, y_test)) 117 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 118 119 start = time() 120 preds = fit_obj.predict(X_test) 121 print(f"Elapsed {time() - start}") 122 print(metrics.classification_report(preds, y_test)) 123 ``` 124 125 """ 126 127 # construct the object ----- 128 129 def __init__( 130 self, 131 obj, 132 n_hidden_features=5, 133 activation_name="relu", 134 a=0.01, 135 nodes_sim="sobol", 136 bias=True, 137 dropout=0, 138 direct_link=True, 139 n_clusters=2, 140 cluster_encode=True, 141 type_clust="kmeans", 142 type_scaling=("std", "std", "std"), 143 col_sample=1, 144 row_sample=1, 145 seed=123, 146 backend="cpu", 147 ): 148 super().__init__( 149 n_hidden_features=n_hidden_features, 150 activation_name=activation_name, 151 a=a, 152 nodes_sim=nodes_sim, 153 bias=bias, 154 dropout=dropout, 155 direct_link=direct_link, 156 n_clusters=n_clusters, 157 cluster_encode=cluster_encode, 158 type_clust=type_clust, 159 type_scaling=type_scaling, 160 col_sample=col_sample, 161 row_sample=row_sample, 162 seed=seed, 163 backend=backend, 164 ) 165 166 self.type_fit = "classification" 167 self.obj = obj 168 self.fit_objs_ = {} 169 170 def fit(self, X, y, sample_weight=None, **kwargs): 171 """Fit MultitaskClassifier to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 assert mx.is_factor(y), "y must contain only integers" 192 193 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 194 195 self.classes_ = np.unique(y) # for compatibility with sklearn 196 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 197 198 # multitask response 199 Y = mo.one_hot_encode2(output_y, self.n_classes_) 200 201 # if sample_weight is None: 202 for i in range(self.n_classes_): 203 self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs)) 204 205 self.classes_ = np.unique(y) 206 return self 207 208 def predict(self, X, **kwargs): 209 """Predict test data X. 210 211 Args: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 **kwargs: additional parameters to be passed to 218 self.cook_test_set 219 220 Returns: 221 222 model predictions: {array-like} 223 224 """ 225 226 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 227 228 def predict_proba(self, X, **kwargs): 229 """Predict probabilities for test data X. 230 231 Args: 232 233 X: {array-like}, shape = [n_samples, n_features] 234 Training vectors, where n_samples is the number 235 of samples and n_features is the number of features. 236 237 **kwargs: additional parameters to be passed to 238 self.cook_test_set 239 240 Returns: 241 242 probability estimates for test data: {array-like} 243 244 """ 245 246 shape_X = X.shape 247 248 probs = np.zeros((shape_X[0], self.n_classes_)) 249 250 if len(shape_X) == 1: 251 n_features = shape_X[0] 252 253 new_X = mo.rbind( 254 X.reshape(1, n_features), 255 np.ones(n_features).reshape(1, n_features), 256 ) 257 258 Z = self.cook_test_set(new_X, **kwargs) 259 260 # loop on all the classes 261 for i in range(self.n_classes_): 262 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 263 264 else: 265 Z = self.cook_test_set(X, **kwargs) 266 267 # loop on all the classes 268 for i in range(self.n_classes_): 269 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 270 271 expit_raw_probs = expit(probs) 272 273 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/mtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.MultitaskClassifier(regr, n_hidden_features=5,
n_clusters=2, type_clust="gmm")
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
170 def fit(self, X, y, sample_weight=None, **kwargs): 171 """Fit MultitaskClassifier to training data (X, y). 172 173 Args: 174 175 X: {array-like}, shape = [n_samples, n_features] 176 Training vectors, where n_samples is the number 177 of samples and n_features is the number of features. 178 179 y: array-like, shape = [n_samples] 180 Target values. 181 182 **kwargs: additional parameters to be passed to 183 self.cook_training_set or self.obj.fit 184 185 Returns: 186 187 self: object 188 189 """ 190 191 assert mx.is_factor(y), "y must contain only integers" 192 193 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 194 195 self.classes_ = np.unique(y) # for compatibility with sklearn 196 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 197 198 # multitask response 199 Y = mo.one_hot_encode2(output_y, self.n_classes_) 200 201 # if sample_weight is None: 202 for i in range(self.n_classes_): 203 self.fit_objs_[i] = deepcopy(self.obj.fit(scaled_Z, Y[:, i], **kwargs)) 204 205 self.classes_ = np.unique(y) 206 return self
Fit MultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
208 def predict(self, X, **kwargs): 209 """Predict test data X. 210 211 Args: 212 213 X: {array-like}, shape = [n_samples, n_features] 214 Training vectors, where n_samples is the number 215 of samples and n_features is the number of features. 216 217 **kwargs: additional parameters to be passed to 218 self.cook_test_set 219 220 Returns: 221 222 model predictions: {array-like} 223 224 """ 225 226 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
228 def predict_proba(self, X, **kwargs): 229 """Predict probabilities for test data X. 230 231 Args: 232 233 X: {array-like}, shape = [n_samples, n_features] 234 Training vectors, where n_samples is the number 235 of samples and n_features is the number of features. 236 237 **kwargs: additional parameters to be passed to 238 self.cook_test_set 239 240 Returns: 241 242 probability estimates for test data: {array-like} 243 244 """ 245 246 shape_X = X.shape 247 248 probs = np.zeros((shape_X[0], self.n_classes_)) 249 250 if len(shape_X) == 1: 251 n_features = shape_X[0] 252 253 new_X = mo.rbind( 254 X.reshape(1, n_features), 255 np.ones(n_features).reshape(1, n_features), 256 ) 257 258 Z = self.cook_test_set(new_X, **kwargs) 259 260 # loop on all the classes 261 for i in range(self.n_classes_): 262 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 263 264 else: 265 Z = self.cook_test_set(X, **kwargs) 266 267 # loop on all the classes 268 for i in range(self.n_classes_): 269 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs) 270 271 expit_raw_probs = expit(probs) 272 273 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
98class NeuralNetRegressor(BaseEstimator, RegressorMixin): 99 """ 100 (Pretrained) Neural Network Regressor. 101 102 Parameters: 103 104 hidden_layer_sizes : tuple, default=(100,) 105 The number of neurons in each hidden layer. 106 max_iter : int, default=100 107 The maximum number of iterations to train the model. 108 learning_rate : float, default=0.01 109 The learning rate for the optimizer. 110 l1_ratio : float, default=0.5 111 The ratio of L1 regularization. 112 alpha : float, default=1e-6 113 The regularization parameter. 114 activation_name : str, default="relu" 115 The activation function to use. 116 dropout : float, default=0.0 117 The dropout rate. 118 random_state : int, default=None 119 The random state for the random number generator. 120 weights : list, default=None 121 The weights to initialize the model with. 122 123 Attributes: 124 125 weights : list 126 The weights of the model. 127 params : list 128 The parameters of the model. 129 scaler_ : sklearn.preprocessing.StandardScaler 130 The scaler used to standardize the input features. 131 y_mean_ : float 132 The mean of the target variable. 133 134 Methods: 135 136 fit(X, y) 137 Fit the model to the data. 138 predict(X) 139 Predict the target variable. 140 get_weights() 141 Get the weights of the model. 142 set_weights(weights) 143 Set the weights of the model. 144 """ 145 146 def __init__( 147 self, 148 hidden_layer_sizes=None, 149 max_iter=100, 150 learning_rate=0.01, 151 l1_ratio=0.5, 152 alpha=1e-6, 153 activation_name="relu", 154 dropout=0, 155 weights=None, 156 random_state=None, 157 ): 158 if weights is None and hidden_layer_sizes is None: 159 hidden_layer_sizes = (100,) # default value if neither is provided 160 self.hidden_layer_sizes = hidden_layer_sizes 161 self.max_iter = max_iter 162 self.learning_rate = learning_rate 163 self.l1_ratio = l1_ratio 164 self.alpha = alpha 165 self.activation_name = activation_name 166 self.dropout = dropout 167 self.weights = weights 168 self.random_state = random_state 169 self.params = None 170 self.scaler_ = StandardScaler() 171 self.y_mean_ = None 172 173 def _validate_weights(self, input_dim): 174 """Validate that weights dimensions are coherent.""" 175 if not self.weights: 176 return False 177 178 try: 179 # Check each layer's weights and biases 180 prev_dim = input_dim 181 for W, b in self.weights: 182 # Check weight matrix dimensions 183 if W.shape[0] != prev_dim: 184 raise ValueError( 185 f"Weight matrix input dimension {W.shape[0]} does not match, previous layer output dimension {prev_dim}" 186 ) 187 # Check bias dimension matches weight matrix output 188 if W.shape[1] != b.shape[0]: 189 raise ValueError( 190 f"Bias dimension {b.shape[0]} does not match weight matrix, output dimension {W.shape[1]}" 191 ) 192 prev_dim = W.shape[1] 193 194 # Check final output dimension is 1 for regression 195 if prev_dim != 1: 196 raise ValueError( 197 f"Final layer output dimension {prev_dim} must be 1 for regression" 198 ) 199 200 return True 201 except (AttributeError, IndexError): 202 raise ValueError( 203 "Weights format is invalid. Expected list of (weight, bias) tuples" 204 ) 205 206 def fit(self, X, y): 207 # Standardize the input features 208 X = self.scaler_.fit_transform(X) 209 # Ensure y is 2D for consistency 210 y = y.reshape(-1, 1) 211 self.y_mean_ = jnp.mean(y) 212 y = y - self.y_mean_ 213 # Validate or initialize weights 214 if self.weights is not None: 215 if self._validate_weights(X.shape[1]): 216 self.params = self.weights 217 else: 218 if self.hidden_layer_sizes is None: 219 raise ValueError( 220 "Either weights or hidden_layer_sizes must be provided" 221 ) 222 self.params = initialize_params( 223 X.shape[1], self.hidden_layer_sizes, self.random_state 224 ) 225 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 226 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 227 perex_grads = jit( 228 vmap(grad_loss, in_axes=(None, 0, 0)) 229 ) # fast per-example grads 230 # Training loop 231 for _ in range(self.max_iter): 232 grads = perex_grads(self.params, X, y) 233 # Average gradients across examples 234 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 235 # Update parameters 236 self.params = [ 237 (W - self.learning_rate * dW, b - self.learning_rate * db) 238 for (W, b), (dW, db) in zip(self.params, grads) 239 ] 240 # Store final weights 241 self.weights = self.params 242 return self 243 244 def get_weights(self): 245 """Return the current weights of the model.""" 246 if self.weights is None: 247 raise ValueError("No weights available. Model has not been fitted yet.") 248 return self.weights 249 250 def set_weights(self, weights): 251 """Set the weights of the model manually.""" 252 self.weights = weights 253 self.params = weights 254 255 def predict(self, X): 256 X = self.scaler_.transform(X) 257 if self.params is None: 258 raise ValueError("Model has not been fitted yet.") 259 predictions = predict_internal( 260 self.params, 261 X, 262 activation_func=self.activation_name, 263 dropout=self.dropout, 264 seed=self.random_state, 265 ) 266 return predictions.reshape(-1) + self.y_mean_
(Pretrained) Neural Network Regressor.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
206 def fit(self, X, y): 207 # Standardize the input features 208 X = self.scaler_.fit_transform(X) 209 # Ensure y is 2D for consistency 210 y = y.reshape(-1, 1) 211 self.y_mean_ = jnp.mean(y) 212 y = y - self.y_mean_ 213 # Validate or initialize weights 214 if self.weights is not None: 215 if self._validate_weights(X.shape[1]): 216 self.params = self.weights 217 else: 218 if self.hidden_layer_sizes is None: 219 raise ValueError( 220 "Either weights or hidden_layer_sizes must be provided" 221 ) 222 self.params = initialize_params( 223 X.shape[1], self.hidden_layer_sizes, self.random_state 224 ) 225 loss_fn = partial(loss, l1_ratio=self.l1_ratio, alpha=self.alpha) 226 grad_loss = jit(grad(loss_fn)) # compiled gradient evaluation function 227 perex_grads = jit( 228 vmap(grad_loss, in_axes=(None, 0, 0)) 229 ) # fast per-example grads 230 # Training loop 231 for _ in range(self.max_iter): 232 grads = perex_grads(self.params, X, y) 233 # Average gradients across examples 234 grads = jax.tree_map(lambda g: jnp.mean(g, axis=0), grads) 235 # Update parameters 236 self.params = [ 237 (W - self.learning_rate * dW, b - self.learning_rate * db) 238 for (W, b), (dW, db) in zip(self.params, grads) 239 ] 240 # Store final weights 241 self.weights = self.params 242 return self
255 def predict(self, X): 256 X = self.scaler_.transform(X) 257 if self.params is None: 258 raise ValueError("Model has not been fitted yet.") 259 predictions = predict_internal( 260 self.params, 261 X, 262 activation_func=self.activation_name, 263 dropout=self.dropout, 264 seed=self.random_state, 265 ) 266 return predictions.reshape(-1) + self.y_mean_
10class NeuralNetClassifier(BaseEstimator, ClassifierMixin): 11 """ 12 (Pretrained) Neural Network Classifier. 13 14 Parameters: 15 16 hidden_layer_sizes : tuple, default=(100,) 17 The number of neurons in each hidden layer. 18 max_iter : int, default=100 19 The maximum number of iterations to train the model. 20 learning_rate : float, default=0.01 21 The learning rate for the optimizer. 22 l1_ratio : float, default=0.5 23 The ratio of L1 regularization. 24 alpha : float, default=1e-6 25 The regularization parameter. 26 activation_name : str, default="relu" 27 The activation function to use. 28 dropout : float, default=0.0 29 The dropout rate. 30 random_state : int, default=None 31 The random state for the random number generator. 32 weights : list, default=None 33 The weights to initialize the model with. 34 35 Attributes: 36 37 weights : list 38 The weights of the model. 39 params : list 40 The parameters of the model. 41 scaler_ : sklearn.preprocessing.StandardScaler 42 The scaler used to standardize the input features. 43 y_mean_ : float 44 The mean of the target variable. 45 46 Methods: 47 48 fit(X, y) 49 Fit the model to the data. 50 predict(X) 51 Predict the target variable. 52 predict_proba(X) 53 Predict the probability of the target variable. 54 get_weights() 55 Get the weights of the model. 56 set_weights(weights) 57 Set the weights of the model. 58 """ 59 60 def __init__( 61 self, 62 hidden_layer_sizes=(100,), 63 max_iter=100, 64 learning_rate=0.01, 65 weights=None, 66 l1_ratio=0.5, 67 alpha=1e-6, 68 activation_name="relu", 69 dropout=0.0, 70 random_state=None, 71 ): 72 self.hidden_layer_sizes = hidden_layer_sizes 73 self.max_iter = max_iter 74 self.learning_rate = learning_rate 75 self.weights = weights 76 self.l1_ratio = l1_ratio 77 self.alpha = alpha 78 self.activation_name = activation_name 79 self.dropout = dropout 80 self.random_state = random_state 81 self.regr = None 82 83 def fit(self, X, y): 84 """Fit the model to the data. 85 86 Parameters: 87 88 X: {array-like}, shape = [n_samples, n_features] 89 Training vectors, where n_samples is the number of samples and 90 n_features is the number of features. 91 y: array-like, shape = [n_samples] 92 Target values. 93 """ 94 regressor = NeuralNetRegressor( 95 hidden_layer_sizes=self.hidden_layer_sizes, 96 max_iter=self.max_iter, 97 learning_rate=self.learning_rate, 98 weights=self.weights, 99 l1_ratio=self.l1_ratio, 100 alpha=self.alpha, 101 activation_name=self.activation_name, 102 dropout=self.dropout, 103 random_state=self.random_state, 104 ) 105 self.regr = SimpleMultitaskClassifier(regressor) 106 self.regr.fit(X, y) 107 self.classes_ = np.unique(y) 108 self.n_classes_ = len(self.classes_) 109 self.n_tasks_ = 1 110 self.n_features_in_ = X.shape[1] 111 self.n_outputs_ = 1 112 self.n_samples_fit_ = X.shape[0] 113 self.n_samples_test_ = X.shape[0] 114 self.n_features_out_ = 1 115 self.n_outputs_ = 1 116 self.n_features_in_ = X.shape[1] 117 self.n_features_out_ = 1 118 self.n_outputs_ = 1 119 return self 120 121 def predict_proba(self, X): 122 """Predict the probability of the target variable. 123 124 Parameters: 125 126 X: {array-like}, shape = [n_samples, n_features] 127 Training vectors, where n_samples is the number of samples and 128 n_features is the number of features. 129 """ 130 return self.regr.predict_proba(X) 131 132 def predict(self, X): 133 """Predict the target variable. 134 135 Parameters: 136 137 X: {array-like}, shape = [n_samples, n_features] 138 Training vectors, where n_samples is the number of samples and 139 n_features is the number of features. 140 """ 141 return self.regr.predict(X)
(Pretrained) Neural Network Classifier.
Parameters:
hidden_layer_sizes : tuple, default=(100,)
The number of neurons in each hidden layer.
max_iter : int, default=100
The maximum number of iterations to train the model.
learning_rate : float, default=0.01
The learning rate for the optimizer.
l1_ratio : float, default=0.5
The ratio of L1 regularization.
alpha : float, default=1e-6
The regularization parameter.
activation_name : str, default="relu"
The activation function to use.
dropout : float, default=0.0
The dropout rate.
random_state : int, default=None
The random state for the random number generator.
weights : list, default=None
The weights to initialize the model with.
Attributes:
weights : list
The weights of the model.
params : list
The parameters of the model.
scaler_ : sklearn.preprocessing.StandardScaler
The scaler used to standardize the input features.
y_mean_ : float
The mean of the target variable.
Methods:
fit(X, y)
Fit the model to the data.
predict(X)
Predict the target variable.
predict_proba(X)
Predict the probability of the target variable.
get_weights()
Get the weights of the model.
set_weights(weights)
Set the weights of the model.
83 def fit(self, X, y): 84 """Fit the model to the data. 85 86 Parameters: 87 88 X: {array-like}, shape = [n_samples, n_features] 89 Training vectors, where n_samples is the number of samples and 90 n_features is the number of features. 91 y: array-like, shape = [n_samples] 92 Target values. 93 """ 94 regressor = NeuralNetRegressor( 95 hidden_layer_sizes=self.hidden_layer_sizes, 96 max_iter=self.max_iter, 97 learning_rate=self.learning_rate, 98 weights=self.weights, 99 l1_ratio=self.l1_ratio, 100 alpha=self.alpha, 101 activation_name=self.activation_name, 102 dropout=self.dropout, 103 random_state=self.random_state, 104 ) 105 self.regr = SimpleMultitaskClassifier(regressor) 106 self.regr.fit(X, y) 107 self.classes_ = np.unique(y) 108 self.n_classes_ = len(self.classes_) 109 self.n_tasks_ = 1 110 self.n_features_in_ = X.shape[1] 111 self.n_outputs_ = 1 112 self.n_samples_fit_ = X.shape[0] 113 self.n_samples_test_ = X.shape[0] 114 self.n_features_out_ = 1 115 self.n_outputs_ = 1 116 self.n_features_in_ = X.shape[1] 117 self.n_features_out_ = 1 118 self.n_outputs_ = 1 119 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
121 def predict_proba(self, X): 122 """Predict the probability of the target variable. 123 124 Parameters: 125 126 X: {array-like}, shape = [n_samples, n_features] 127 Training vectors, where n_samples is the number of samples and 128 n_features is the number of features. 129 """ 130 return self.regr.predict_proba(X)
Predict the probability of the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
132 def predict(self, X): 133 """Predict the target variable. 134 135 Parameters: 136 137 X: {array-like}, shape = [n_samples, n_features] 138 Training vectors, where n_samples is the number of samples and 139 n_features is the number of features. 140 """ 141 return self.regr.predict(X)
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
19class PredictionInterval(BaseEstimator, RegressorMixin): 20 """Class PredictionInterval: Obtain prediction intervals. 21 22 Attributes: 23 24 obj: an object; 25 fitted object containing methods `fit` and `predict` 26 27 method: a string; 28 method for constructing the prediction intervals. 29 Currently "splitconformal" (default) and "localconformal" 30 31 level: a float; 32 Confidence level for prediction intervals. Default is 95, 33 equivalent to a miscoverage error of 5 (%) 34 35 replications: an integer; 36 Number of replications for simulated conformal (default is `None`), 37 for type_pi = "bootstrap" or "kde" 38 39 type_pi: a string; 40 type of prediction interval: currently `None` 41 (split conformal without simulation), "kde" or "bootstrap" 42 43 type_split: a string; 44 "random" (random split of data) or "sequential" (sequential split of data) 45 46 seed: an integer; 47 Reproducibility of fit (there's a random split between fitting and calibration data) 48 """ 49 50 def __init__( 51 self, 52 obj, 53 method="splitconformal", 54 level=95, 55 type_pi=None, 56 type_split="random", 57 replications=None, 58 kernel=None, 59 agg="mean", 60 seed=123, 61 ): 62 63 self.obj = obj 64 self.method = method 65 self.level = level 66 self.type_pi = type_pi 67 self.type_split = type_split 68 self.replications = replications 69 self.kernel = kernel 70 self.agg = agg 71 self.seed = seed 72 self.alpha_ = 1 - self.level / 100 73 self.quantile_ = None 74 self.icp_ = None 75 self.calibrated_residuals_ = None 76 self.scaled_calibrated_residuals_ = None 77 self.calibrated_residuals_scaler_ = None 78 self.kde_ = None 79 80 def fit(self, X, y, sample_weight=None, **kwargs): 81 """Fit the `method` to training data (X, y). 82 83 Args: 84 85 X: array-like, shape = [n_samples, n_features]; 86 Training set vectors, where n_samples is the number 87 of samples and n_features is the number of features. 88 89 y: array-like, shape = [n_samples, ]; Target values. 90 91 sample_weight: array-like, shape = [n_samples] 92 Sample weights. 93 94 """ 95 96 if self.type_split == "random": 97 98 X_train, X_calibration, y_train, y_calibration = train_test_split( 99 X, y, test_size=0.5, random_state=self.seed 100 ) 101 102 elif self.type_split == "sequential": 103 104 n_x = X.shape[0] 105 n_x_half = n_x // 2 106 first_half_idx = range(0, n_x_half) 107 second_half_idx = range(n_x_half, n_x) 108 X_train = X[first_half_idx, :] 109 X_calibration = X[second_half_idx, :] 110 y_train = y[first_half_idx] 111 y_calibration = y[second_half_idx] 112 113 if self.method == "splitconformal": 114 115 self.obj.fit(X_train, y_train) 116 preds_calibration = self.obj.predict(X_calibration) 117 self.calibrated_residuals_ = y_calibration - preds_calibration 118 absolute_residuals = np.abs(self.calibrated_residuals_) 119 self.calibrated_residuals_scaler_ = StandardScaler( 120 with_mean=True, with_std=True 121 ) 122 self.scaled_calibrated_residuals_ = ( 123 self.calibrated_residuals_scaler_.fit_transform( 124 self.calibrated_residuals_.reshape(-1, 1) 125 ).ravel() 126 ) 127 try: 128 # numpy version >= 1.22 129 self.quantile_ = np.quantile( 130 a=absolute_residuals, q=self.level / 100, method="higher" 131 ) 132 except: 133 # numpy version < 1.22 134 self.quantile_ = np.quantile( 135 a=absolute_residuals, 136 q=self.level / 100, 137 interpolation="higher", 138 ) 139 140 if self.method == "localconformal": 141 142 mad_estimator = ExtraTreesRegressor() 143 normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc()) 144 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 145 self.icp_ = IcpRegressor(nc) 146 self.icp_.fit(X_train, y_train) 147 self.icp_.calibrate(X_calibration, y_calibration) 148 149 return self 150 151 def predict(self, X, return_pi=False): 152 """Obtain predictions and prediction intervals 153 154 Args: 155 156 X: array-like, shape = [n_samples, n_features]; 157 Testing set vectors, where n_samples is the number 158 of samples and n_features is the number of features. 159 160 return_pi: boolean 161 Whether the prediction interval is returned or not. 162 Default is False, for compatibility with other _estimators_. 163 If True, a tuple containing the predictions + lower and upper 164 bounds is returned. 165 166 """ 167 168 if self.method == "splitconformal": 169 pred = self.obj.predict(X) 170 171 if self.method == "localconformal": 172 pred = self.icp_.predict(X) 173 174 if self.method == "splitconformal": 175 176 if ( 177 self.replications is None and self.type_pi is None 178 ): # type_pi is not used here, no bootstrap or kde 179 180 if return_pi: 181 182 DescribeResult = namedtuple( 183 "DescribeResult", ("mean", "lower", "upper") 184 ) 185 186 return DescribeResult( 187 pred, pred - self.quantile_, pred + self.quantile_ 188 ) 189 190 else: 191 192 return pred 193 194 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 195 196 if self.type_pi is None: 197 self.type_pi = "kde" 198 raise Warning("type_pi must be set, setting to 'kde'") 199 200 if self.replications is None: 201 self.replications = 100 202 raise Warning("replications must be set, setting to 100") 203 204 assert self.type_pi in ( 205 "bootstrap", 206 "kde", 207 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 208 209 if self.type_pi == "bootstrap": 210 np.random.seed(self.seed) 211 self.residuals_sims_ = np.asarray( 212 [ 213 np.random.choice( 214 a=self.scaled_calibrated_residuals_, 215 size=X.shape[0], 216 ) 217 for _ in range(self.replications) 218 ] 219 ).T 220 self.sims_ = np.asarray( 221 [ 222 pred 223 + self.calibrated_residuals_scaler_.scale_[0] 224 * self.residuals_sims_[:, i].ravel() 225 for i in range(self.replications) 226 ] 227 ).T 228 elif self.type_pi == "kde": 229 self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 230 self.sims_ = np.asarray( 231 [ 232 pred 233 + self.calibrated_residuals_scaler_.scale_[0] 234 * self.kde_.resample( 235 size=X.shape[0], seed=self.seed + i 236 ).ravel() 237 for i in range(self.replications) 238 ] 239 ).T 240 241 self.mean_ = np.mean(self.sims_, axis=1) 242 self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1) 243 self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1) 244 245 DescribeResult = namedtuple( 246 "DescribeResult", ("mean", "sims", "lower", "upper") 247 ) 248 249 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 250 251 if self.method == "localconformal": 252 253 if self.replications is None: 254 255 if return_pi: 256 257 predictions_bounds = self.icp_.predict( 258 X, significance=1 - self.level 259 ) 260 DescribeResult = namedtuple( 261 "DescribeResult", ("mean", "lower", "upper") 262 ) 263 return DescribeResult( 264 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 265 ) 266 267 else: 268 269 return pred 270 271 else: # (self.method == "localconformal") and if self.replications is not None 272 273 raise NotImplementedError( 274 "When self.method == 'localconformal', there are no simulations" 275 )
Class PredictionInterval: Obtain prediction intervals.
Attributes:
obj: an object;
fitted object containing methods `fit` and `predict`
method: a string;
method for constructing the prediction intervals.
Currently "splitconformal" (default) and "localconformal"
level: a float;
Confidence level for prediction intervals. Default is 95,
equivalent to a miscoverage error of 5 (%)
replications: an integer;
Number of replications for simulated conformal (default is `None`),
for type_pi = "bootstrap" or "kde"
type_pi: a string;
type of prediction interval: currently `None`
(split conformal without simulation), "kde" or "bootstrap"
type_split: a string;
"random" (random split of data) or "sequential" (sequential split of data)
seed: an integer;
Reproducibility of fit (there's a random split between fitting and calibration data)
80 def fit(self, X, y, sample_weight=None, **kwargs): 81 """Fit the `method` to training data (X, y). 82 83 Args: 84 85 X: array-like, shape = [n_samples, n_features]; 86 Training set vectors, where n_samples is the number 87 of samples and n_features is the number of features. 88 89 y: array-like, shape = [n_samples, ]; Target values. 90 91 sample_weight: array-like, shape = [n_samples] 92 Sample weights. 93 94 """ 95 96 if self.type_split == "random": 97 98 X_train, X_calibration, y_train, y_calibration = train_test_split( 99 X, y, test_size=0.5, random_state=self.seed 100 ) 101 102 elif self.type_split == "sequential": 103 104 n_x = X.shape[0] 105 n_x_half = n_x // 2 106 first_half_idx = range(0, n_x_half) 107 second_half_idx = range(n_x_half, n_x) 108 X_train = X[first_half_idx, :] 109 X_calibration = X[second_half_idx, :] 110 y_train = y[first_half_idx] 111 y_calibration = y[second_half_idx] 112 113 if self.method == "splitconformal": 114 115 self.obj.fit(X_train, y_train) 116 preds_calibration = self.obj.predict(X_calibration) 117 self.calibrated_residuals_ = y_calibration - preds_calibration 118 absolute_residuals = np.abs(self.calibrated_residuals_) 119 self.calibrated_residuals_scaler_ = StandardScaler( 120 with_mean=True, with_std=True 121 ) 122 self.scaled_calibrated_residuals_ = ( 123 self.calibrated_residuals_scaler_.fit_transform( 124 self.calibrated_residuals_.reshape(-1, 1) 125 ).ravel() 126 ) 127 try: 128 # numpy version >= 1.22 129 self.quantile_ = np.quantile( 130 a=absolute_residuals, q=self.level / 100, method="higher" 131 ) 132 except: 133 # numpy version < 1.22 134 self.quantile_ = np.quantile( 135 a=absolute_residuals, 136 q=self.level / 100, 137 interpolation="higher", 138 ) 139 140 if self.method == "localconformal": 141 142 mad_estimator = ExtraTreesRegressor() 143 normalizer = RegressorNormalizer(self.obj, mad_estimator, AbsErrorErrFunc()) 144 nc = RegressorNc(self.obj, AbsErrorErrFunc(), normalizer) 145 self.icp_ = IcpRegressor(nc) 146 self.icp_.fit(X_train, y_train) 147 self.icp_.calibrate(X_calibration, y_calibration) 148 149 return self
Fit the method to training data (X, y).
Args:
X: array-like, shape = [n_samples, n_features];
Training set vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples, ]; Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
151 def predict(self, X, return_pi=False): 152 """Obtain predictions and prediction intervals 153 154 Args: 155 156 X: array-like, shape = [n_samples, n_features]; 157 Testing set vectors, where n_samples is the number 158 of samples and n_features is the number of features. 159 160 return_pi: boolean 161 Whether the prediction interval is returned or not. 162 Default is False, for compatibility with other _estimators_. 163 If True, a tuple containing the predictions + lower and upper 164 bounds is returned. 165 166 """ 167 168 if self.method == "splitconformal": 169 pred = self.obj.predict(X) 170 171 if self.method == "localconformal": 172 pred = self.icp_.predict(X) 173 174 if self.method == "splitconformal": 175 176 if ( 177 self.replications is None and self.type_pi is None 178 ): # type_pi is not used here, no bootstrap or kde 179 180 if return_pi: 181 182 DescribeResult = namedtuple( 183 "DescribeResult", ("mean", "lower", "upper") 184 ) 185 186 return DescribeResult( 187 pred, pred - self.quantile_, pred + self.quantile_ 188 ) 189 190 else: 191 192 return pred 193 194 else: # self.method == "splitconformal" and if self.replications is not None, type_pi must be used 195 196 if self.type_pi is None: 197 self.type_pi = "kde" 198 raise Warning("type_pi must be set, setting to 'kde'") 199 200 if self.replications is None: 201 self.replications = 100 202 raise Warning("replications must be set, setting to 100") 203 204 assert self.type_pi in ( 205 "bootstrap", 206 "kde", 207 ), "`self.type_pi` must be in ('bootstrap', 'kde')" 208 209 if self.type_pi == "bootstrap": 210 np.random.seed(self.seed) 211 self.residuals_sims_ = np.asarray( 212 [ 213 np.random.choice( 214 a=self.scaled_calibrated_residuals_, 215 size=X.shape[0], 216 ) 217 for _ in range(self.replications) 218 ] 219 ).T 220 self.sims_ = np.asarray( 221 [ 222 pred 223 + self.calibrated_residuals_scaler_.scale_[0] 224 * self.residuals_sims_[:, i].ravel() 225 for i in range(self.replications) 226 ] 227 ).T 228 elif self.type_pi == "kde": 229 self.kde_ = gaussian_kde(dataset=self.scaled_calibrated_residuals_) 230 self.sims_ = np.asarray( 231 [ 232 pred 233 + self.calibrated_residuals_scaler_.scale_[0] 234 * self.kde_.resample( 235 size=X.shape[0], seed=self.seed + i 236 ).ravel() 237 for i in range(self.replications) 238 ] 239 ).T 240 241 self.mean_ = np.mean(self.sims_, axis=1) 242 self.lower_ = np.quantile(self.sims_, q=self.alpha_ / 200, axis=1) 243 self.upper_ = np.quantile(self.sims_, q=1 - self.alpha_ / 200, axis=1) 244 245 DescribeResult = namedtuple( 246 "DescribeResult", ("mean", "sims", "lower", "upper") 247 ) 248 249 return DescribeResult(self.mean_, self.sims_, self.lower_, self.upper_) 250 251 if self.method == "localconformal": 252 253 if self.replications is None: 254 255 if return_pi: 256 257 predictions_bounds = self.icp_.predict( 258 X, significance=1 - self.level 259 ) 260 DescribeResult = namedtuple( 261 "DescribeResult", ("mean", "lower", "upper") 262 ) 263 return DescribeResult( 264 pred, predictions_bounds[:, 0], predictions_bounds[:, 1] 265 ) 266 267 else: 268 269 return pred 270 271 else: # (self.method == "localconformal") and if self.replications is not None 272 273 raise NotImplementedError( 274 "When self.method == 'localconformal', there are no simulations" 275 )
Obtain predictions and prediction intervals
Args:
X: array-like, shape = [n_samples, n_features];
Testing set vectors, where n_samples is the number
of samples and n_features is the number of features.
return_pi: boolean
Whether the prediction interval is returned or not.
Default is False, for compatibility with other _estimators_.
If True, a tuple containing the predictions + lower and upper
bounds is returned.
18class SimpleMultitaskClassifier(Base, ClassifierMixin): 19 """Multitask Classification model based on regression models, with shared covariates 20 21 Parameters: 22 23 obj: object 24 any object (must be a regression model) containing a method fit (obj.fit()) 25 and a method predict (obj.predict()) 26 27 seed: int 28 reproducibility seed 29 30 Attributes: 31 32 fit_objs_: dict 33 objects adjusted to each individual time series 34 35 n_classes_: int 36 number of classes for the classifier 37 38 Examples: 39 40 ```python 41 import nnetsauce as ns 42 import numpy as np 43 from sklearn.datasets import load_breast_cancer 44 from sklearn.linear_model import LinearRegression 45 from sklearn.model_selection import train_test_split 46 from sklearn import metrics 47 from time import time 48 49 breast_cancer = load_breast_cancer() 50 Z = breast_cancer.data 51 t = breast_cancer.target 52 53 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2, 54 random_state=123+2*10) 55 56 # Linear Regression is used 57 regr = LinearRegression() 58 fit_obj = ns.SimpleMultitaskClassifier(regr) 59 60 start = time() 61 fit_obj.fit(X_train, y_train) 62 print(f"Elapsed {time() - start}") 63 64 print(fit_obj.score(X_test, y_test)) 65 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 66 67 start = time() 68 preds = fit_obj.predict(X_test) 69 print(f"Elapsed {time() - start}") 70 print(metrics.classification_report(preds, y_test)) 71 ``` 72 73 """ 74 75 # construct the object ----- 76 77 def __init__( 78 self, 79 obj, 80 ): 81 self.type_fit = "classification" 82 self.obj = obj 83 self.fit_objs_ = {} 84 self.X_scaler_ = StandardScaler() 85 self.scaled_X_ = None 86 87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit SimpleMultitaskClassifier to training data (X, y). 89 90 Args: 91 92 X: {array-like}, shape = [n_samples, n_features] 93 Training vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples] 97 Target values. 98 99 **kwargs: additional parameters to be passed to 100 self.cook_training_set or self.obj.fit 101 102 Returns: 103 104 self: object 105 106 """ 107 108 assert mx.is_factor(y), "y must contain only integers" 109 110 self.classes_ = np.unique(y) # for compatibility with sklearn 111 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 112 113 self.scaled_X_ = self.X_scaler_.fit_transform(X) 114 115 # multitask response 116 Y = mo.one_hot_encode2(y, self.n_classes_) 117 118 # if sample_weight is None: 119 for i in range(self.n_classes_): 120 self.fit_objs_[i] = deepcopy( 121 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 122 ) 123 self.classes_ = np.unique(y) 124 return self 125 126 def predict(self, X, **kwargs): 127 """Predict test data X. 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 **kwargs: additional parameters 136 137 Returns: 138 139 model predictions: {array-like} 140 141 """ 142 try: 143 preds = self.predict_proba(X, **kwargs) 144 try: 145 DescribeResult = namedtuple( 146 "DescribeResult", ["mean", "upper", "lower", "median"] 147 ) 148 return DescribeResult( 149 mean=np.argmax(preds.mean, axis=1), 150 upper=np.argmax(preds.upper, axis=1), 151 lower=np.argmax(preds.lower, axis=1), 152 median=np.argmax(preds.median, axis=1), 153 ) 154 except Exception as e: 155 156 DescribeResult = namedtuple( 157 "DescribeResult", ["mean", "upper", "lower"] 158 ) 159 return DescribeResult( 160 mean=np.argmax(preds.mean, axis=1), 161 upper=np.argmax(preds.upper, axis=1), 162 lower=np.argmax(preds.lower, axis=1), 163 ) 164 except Exception as e: 165 166 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 167 168 def predict_proba(self, X, **kwargs): 169 """Predict probabilities for test data X. 170 171 Args: 172 173 X: {array-like}, shape = [n_samples, n_features] 174 Training vectors, where n_samples is the number 175 of samples and n_features is the number of features. 176 177 **kwargs: additional parameters 178 179 Returns: 180 181 probability estimates for test data: {array-like} 182 183 """ 184 185 shape_X = X.shape 186 187 probs = np.zeros((shape_X[0], self.n_classes_)) 188 189 if len(shape_X) == 1: 190 n_features = shape_X[0] 191 192 new_X = mo.rbind( 193 X.reshape(1, n_features), 194 np.ones(n_features).reshape(1, n_features), 195 ) 196 197 Z = self.X_scaler_.transform(new_X, **kwargs) 198 199 try: 200 # Try probabilistic model first (conformal or quantile) 201 probs_upper = np.zeros((shape_X[0], self.n_classes_)) 202 probs_lower = np.zeros((shape_X[0], self.n_classes_)) 203 probs_median = np.zeros((shape_X[0], self.n_classes_)) 204 205 # loop on all the classes 206 for i in range(self.n_classes_): 207 probs_temp = self.fit_objs_[i].predict(Z, **kwargs) 208 probs_upper[:, i] = probs_temp.upper 209 probs_lower[:, i] = probs_temp.lower 210 probs[:, i] = probs_temp.mean 211 try: 212 probs_median[:, i] = probs_temp.median 213 except: 214 pass 215 216 except Exception as e: 217 218 # Fallback to standard model 219 for i in range(self.n_classes_): 220 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 221 222 else: 223 224 Z = self.X_scaler_.transform(X, **kwargs) 225 226 try: 227 # Try probabilistic model first (conformal or quantile) 228 probs_upper = np.zeros((shape_X[0], self.n_classes_)) 229 probs_lower = np.zeros((shape_X[0], self.n_classes_)) 230 probs_median = np.zeros((shape_X[0], self.n_classes_)) 231 232 # loop on all the classes 233 for i in range(self.n_classes_): 234 probs_temp = self.fit_objs_[i].predict(Z, **kwargs) 235 probs_upper[:, i] = probs_temp.upper 236 probs_lower[:, i] = probs_temp.lower 237 probs[:, i] = probs_temp.mean 238 try: 239 probs_median[:, i] = probs_temp.median 240 except: 241 pass 242 243 except Exception as e: 244 245 # Fallback to standard model 246 for i in range(self.n_classes_): 247 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 248 249 expit_raw_probs = expit(probs) 250 251 try: 252 expit_raw_probs_upper = expit(probs_upper) 253 expit_raw_probs_lower = expit(probs_lower) 254 try: 255 expit_raw_probs_median = expit(probs_median) 256 except Exception as e: 257 258 pass 259 probs_upper = ( 260 expit_raw_probs_upper / expit_raw_probs_upper.sum(axis=1)[:, None] 261 ) 262 probs_lower = ( 263 expit_raw_probs_lower / expit_raw_probs_lower.sum(axis=1)[:, None] 264 ) 265 probs_upper = np.minimum(probs_upper, 1) 266 probs_lower = np.maximum(probs_lower, 0) 267 try: 268 probs_median = ( 269 expit_raw_probs_median / expit_raw_probs_median.sum(axis=1)[:, None] 270 ) 271 except Exception as e: 272 273 pass 274 275 # Normalize each probability independently to [0,1] range 276 probs = expit_raw_probs 277 probs_upper = np.minimum(expit_raw_probs_upper, 1) 278 probs_lower = np.maximum(expit_raw_probs_lower, 0) 279 280 # Ensure upper >= lower 281 probs_upper = np.maximum(probs_upper, probs_lower) 282 283 try: 284 probs_median = expit_raw_probs_median 285 except Exception as e: 286 287 pass 288 289 try: 290 DescribeResult = namedtuple( 291 "DescribeResult", ["mean", "upper", "lower", "median"] 292 ) 293 return DescribeResult( 294 mean=probs, 295 upper=probs_upper, 296 lower=probs_lower, 297 median=probs_median, 298 ) 299 except Exception as e: 300 301 DescribeResult = namedtuple( 302 "DescribeResult", ["mean", "upper", "lower"] 303 ) 304 return DescribeResult(mean=probs, upper=probs_upper, lower=probs_lower) 305 306 except Exception as e: 307 308 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Multitask Classification model based on regression models, with shared covariates
Parameters:
obj: object
any object (must be a regression model) containing a method fit (obj.fit())
and a method predict (obj.predict())
seed: int
reproducibility seed
Attributes:
fit_objs_: dict
objects adjusted to each individual time series
n_classes_: int
number of classes for the classifier
Examples:
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2,
random_state=123+2*10)
# Linear Regression is used
regr = LinearRegression()
fit_obj = ns.SimpleMultitaskClassifier(regr)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
87 def fit(self, X, y, sample_weight=None, **kwargs): 88 """Fit SimpleMultitaskClassifier to training data (X, y). 89 90 Args: 91 92 X: {array-like}, shape = [n_samples, n_features] 93 Training vectors, where n_samples is the number 94 of samples and n_features is the number of features. 95 96 y: array-like, shape = [n_samples] 97 Target values. 98 99 **kwargs: additional parameters to be passed to 100 self.cook_training_set or self.obj.fit 101 102 Returns: 103 104 self: object 105 106 """ 107 108 assert mx.is_factor(y), "y must contain only integers" 109 110 self.classes_ = np.unique(y) # for compatibility with sklearn 111 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 112 113 self.scaled_X_ = self.X_scaler_.fit_transform(X) 114 115 # multitask response 116 Y = mo.one_hot_encode2(y, self.n_classes_) 117 118 # if sample_weight is None: 119 for i in range(self.n_classes_): 120 self.fit_objs_[i] = deepcopy( 121 self.obj.fit(self.scaled_X_, Y[:, i], **kwargs) 122 ) 123 self.classes_ = np.unique(y) 124 return self
Fit SimpleMultitaskClassifier to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
126 def predict(self, X, **kwargs): 127 """Predict test data X. 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 **kwargs: additional parameters 136 137 Returns: 138 139 model predictions: {array-like} 140 141 """ 142 try: 143 preds = self.predict_proba(X, **kwargs) 144 try: 145 DescribeResult = namedtuple( 146 "DescribeResult", ["mean", "upper", "lower", "median"] 147 ) 148 return DescribeResult( 149 mean=np.argmax(preds.mean, axis=1), 150 upper=np.argmax(preds.upper, axis=1), 151 lower=np.argmax(preds.lower, axis=1), 152 median=np.argmax(preds.median, axis=1), 153 ) 154 except Exception as e: 155 156 DescribeResult = namedtuple( 157 "DescribeResult", ["mean", "upper", "lower"] 158 ) 159 return DescribeResult( 160 mean=np.argmax(preds.mean, axis=1), 161 upper=np.argmax(preds.upper, axis=1), 162 lower=np.argmax(preds.lower, axis=1), 163 ) 164 except Exception as e: 165 166 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
model predictions: {array-like}
168 def predict_proba(self, X, **kwargs): 169 """Predict probabilities for test data X. 170 171 Args: 172 173 X: {array-like}, shape = [n_samples, n_features] 174 Training vectors, where n_samples is the number 175 of samples and n_features is the number of features. 176 177 **kwargs: additional parameters 178 179 Returns: 180 181 probability estimates for test data: {array-like} 182 183 """ 184 185 shape_X = X.shape 186 187 probs = np.zeros((shape_X[0], self.n_classes_)) 188 189 if len(shape_X) == 1: 190 n_features = shape_X[0] 191 192 new_X = mo.rbind( 193 X.reshape(1, n_features), 194 np.ones(n_features).reshape(1, n_features), 195 ) 196 197 Z = self.X_scaler_.transform(new_X, **kwargs) 198 199 try: 200 # Try probabilistic model first (conformal or quantile) 201 probs_upper = np.zeros((shape_X[0], self.n_classes_)) 202 probs_lower = np.zeros((shape_X[0], self.n_classes_)) 203 probs_median = np.zeros((shape_X[0], self.n_classes_)) 204 205 # loop on all the classes 206 for i in range(self.n_classes_): 207 probs_temp = self.fit_objs_[i].predict(Z, **kwargs) 208 probs_upper[:, i] = probs_temp.upper 209 probs_lower[:, i] = probs_temp.lower 210 probs[:, i] = probs_temp.mean 211 try: 212 probs_median[:, i] = probs_temp.median 213 except: 214 pass 215 216 except Exception as e: 217 218 # Fallback to standard model 219 for i in range(self.n_classes_): 220 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 221 222 else: 223 224 Z = self.X_scaler_.transform(X, **kwargs) 225 226 try: 227 # Try probabilistic model first (conformal or quantile) 228 probs_upper = np.zeros((shape_X[0], self.n_classes_)) 229 probs_lower = np.zeros((shape_X[0], self.n_classes_)) 230 probs_median = np.zeros((shape_X[0], self.n_classes_)) 231 232 # loop on all the classes 233 for i in range(self.n_classes_): 234 probs_temp = self.fit_objs_[i].predict(Z, **kwargs) 235 probs_upper[:, i] = probs_temp.upper 236 probs_lower[:, i] = probs_temp.lower 237 probs[:, i] = probs_temp.mean 238 try: 239 probs_median[:, i] = probs_temp.median 240 except: 241 pass 242 243 except Exception as e: 244 245 # Fallback to standard model 246 for i in range(self.n_classes_): 247 probs[:, i] = self.fit_objs_[i].predict(Z, **kwargs)[0] 248 249 expit_raw_probs = expit(probs) 250 251 try: 252 expit_raw_probs_upper = expit(probs_upper) 253 expit_raw_probs_lower = expit(probs_lower) 254 try: 255 expit_raw_probs_median = expit(probs_median) 256 except Exception as e: 257 258 pass 259 probs_upper = ( 260 expit_raw_probs_upper / expit_raw_probs_upper.sum(axis=1)[:, None] 261 ) 262 probs_lower = ( 263 expit_raw_probs_lower / expit_raw_probs_lower.sum(axis=1)[:, None] 264 ) 265 probs_upper = np.minimum(probs_upper, 1) 266 probs_lower = np.maximum(probs_lower, 0) 267 try: 268 probs_median = ( 269 expit_raw_probs_median / expit_raw_probs_median.sum(axis=1)[:, None] 270 ) 271 except Exception as e: 272 273 pass 274 275 # Normalize each probability independently to [0,1] range 276 probs = expit_raw_probs 277 probs_upper = np.minimum(expit_raw_probs_upper, 1) 278 probs_lower = np.maximum(expit_raw_probs_lower, 0) 279 280 # Ensure upper >= lower 281 probs_upper = np.maximum(probs_upper, probs_lower) 282 283 try: 284 probs_median = expit_raw_probs_median 285 except Exception as e: 286 287 pass 288 289 try: 290 DescribeResult = namedtuple( 291 "DescribeResult", ["mean", "upper", "lower", "median"] 292 ) 293 return DescribeResult( 294 mean=probs, 295 upper=probs_upper, 296 lower=probs_lower, 297 median=probs_median, 298 ) 299 except Exception as e: 300 301 DescribeResult = namedtuple( 302 "DescribeResult", ["mean", "upper", "lower"] 303 ) 304 return DescribeResult(mean=probs, upper=probs_upper, lower=probs_lower) 305 306 except Exception as e: 307 308 return expit_raw_probs / expit_raw_probs.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters
Returns:
probability estimates for test data: {array-like}
9class Optimizer: 10 """Optimizer class 11 12 Attributes: 13 14 type_optim: str 15 type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent) 16 or 'scd' (stochastic minibatch coordinate descent) 17 18 num_iters: int 19 number of iterations of the optimizer 20 21 learning_rate: float 22 step size 23 24 batch_prop: float 25 proportion of the initial data used at each optimization step 26 27 learning_method: str 28 "poly" - learning rate decreasing as a polynomial function 29 of # of iterations (default) 30 "exp" - learning rate decreasing as an exponential function 31 of # of iterations 32 "momentum" - gradient descent using momentum 33 34 randomization: str 35 type of randomization applied at each step 36 "strat" - stratified subsampling (default) 37 "shuffle" - random subsampling 38 39 mass: float 40 mass on velocity, for `method` == "momentum" 41 42 decay: float 43 coefficient of decrease of the learning rate for 44 `method` == "poly" and `method` == "exp" 45 46 tolerance: float 47 early stopping parameter (convergence of loss function) 48 49 verbose: int 50 controls verbosity of gradient descent 51 0 - nothing is printed 52 1 - a progress bar is printed 53 2 - successive loss function values are printed 54 55 """ 56 57 # construct the object ----- 58 59 def __init__( 60 self, 61 type_optim="sgd", 62 num_iters=100, 63 learning_rate=0.01, 64 batch_prop=1.0, 65 learning_method="momentum", 66 randomization="strat", 67 mass=0.9, 68 decay=0.1, 69 tolerance=1e-3, 70 verbose=1, 71 ): 72 self.type_optim = type_optim 73 self.num_iters = num_iters 74 self.learning_rate = learning_rate 75 self.batch_prop = batch_prop 76 self.learning_method = learning_method 77 self.randomization = randomization 78 self.mass = mass 79 self.decay = decay 80 self.tolerance = tolerance 81 self.verbose = verbose 82 self.opt = None 83 84 def fit(self, loss_func, response, x0, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self 141 142 def one_hot_encode(self, y, n_classes): 143 return one_hot_encode(y, n_classes)
Optimizer class
Attributes:
type_optim: str
type of optimizer, (currently) either 'sgd' (stochastic minibatch gradient descent)
or 'scd' (stochastic minibatch coordinate descent)
num_iters: int
number of iterations of the optimizer
learning_rate: float
step size
batch_prop: float
proportion of the initial data used at each optimization step
learning_method: str
"poly" - learning rate decreasing as a polynomial function
of # of iterations (default)
"exp" - learning rate decreasing as an exponential function
of # of iterations
"momentum" - gradient descent using momentum
randomization: str
type of randomization applied at each step
"strat" - stratified subsampling (default)
"shuffle" - random subsampling
mass: float
mass on velocity, for `method` == "momentum"
decay: float
coefficient of decrease of the learning rate for
`method` == "poly" and `method` == "exp"
tolerance: float
early stopping parameter (convergence of loss function)
verbose: int
controls verbosity of gradient descent
0 - nothing is printed
1 - a progress bar is printed
2 - successive loss function values are printed
84 def fit(self, loss_func, response, x0, **kwargs): 85 """Fit GLM model to training data (X, y). 86 87 Args: 88 89 loss_func: loss function 90 91 response: array-like, shape = [n_samples] 92 target variable (used for subsampling) 93 94 x0: array-like, shape = [n_features] 95 initial value provided to the optimizer 96 97 **kwargs: additional parameters to be passed to 98 loss function 99 100 Returns: 101 102 self: object 103 104 """ 105 106 if self.type_optim == "scd": 107 self.results = scd( 108 loss_func, 109 response=response, 110 x=x0, 111 num_iters=self.num_iters, 112 batch_prop=self.batch_prop, 113 learning_rate=self.learning_rate, 114 learning_method=self.learning_method, 115 mass=self.mass, 116 decay=self.decay, 117 randomization=self.randomization, 118 tolerance=self.tolerance, 119 verbose=self.verbose, 120 **kwargs 121 ) 122 123 if self.type_optim == "sgd": 124 self.results = sgd( 125 loss_func, 126 response=response, 127 x=x0, 128 num_iters=self.num_iters, 129 batch_prop=self.batch_prop, 130 learning_rate=self.learning_rate, 131 learning_method=self.learning_method, 132 mass=self.mass, 133 decay=self.decay, 134 randomization=self.randomization, 135 tolerance=self.tolerance, 136 verbose=self.verbose, 137 **kwargs 138 ) 139 140 return self
Fit GLM model to training data (X, y).
Args:
loss_func: loss function
response: array-like, shape = [n_samples]
target variable (used for subsampling)
x0: array-like, shape = [n_features]
initial value provided to the optimizer
**kwargs: additional parameters to be passed to
loss function
Returns:
self: object
37class QuantileRegressor(BaseEstimator, RegressorMixin): 38 """ 39 Quantile Regressor. 40 41 Parameters: 42 43 obj: base model (regression model) 44 The base regressor from which to build a 45 quantile regressor. 46 47 level: int, default=95 48 The level of the quantiles to compute. 49 50 scoring: str, default="predictions" 51 The scoring to use for the optimization and constructing 52 prediction intervals (predictions, residuals, conformal, 53 studentized, conformal-studentized). 54 55 Attributes: 56 57 obj_ : base model (regression model) 58 The base regressor from which to build a 59 quantile regressor. 60 61 offset_multipliers_ : list 62 The multipliers for the offset. 63 64 scoring_residuals_ : list 65 The residuals for the scoring. 66 67 student_multiplier_ : float 68 The multiplier for the student. 69 70 71 """ 72 73 def __init__(self, obj, level=95, scoring="predictions"): 74 assert scoring in ( 75 "predictions", 76 "residuals", 77 "conformal", 78 "studentized", 79 "conformal-studentized", 80 ), "scoring must be 'predictions' or 'residuals'" 81 self.obj = obj 82 low_risk_level = (1 - level / 100) / 2 83 self.quantiles = [low_risk_level, 0.5, 1 - low_risk_level] 84 self.scoring = scoring 85 self.offset_multipliers_ = None 86 self.obj_ = None 87 self.scoring_residuals_ = None 88 self.student_multiplier_ = None 89 90 def _compute_quantile_loss(self, residuals: np.ndarray, quantile: float) -> float: 91 """ 92 Compute the quantile loss for a given set of residuals and quantile. 93 """ 94 if not 0 < quantile < 1: 95 raise ValueError("Quantile should be between 0 and 1.") 96 loss = quantile * (residuals >= 0) + (quantile - 1) * (residuals < 0) 97 return np.mean(residuals * loss) 98 99 def _optimize_multiplier( 100 self, 101 y: np.ndarray, 102 base_predictions: np.ndarray, 103 prev_predictions: np.ndarray = None, 104 scoring_residuals: np.ndarray = None, 105 quantile: float = 0.5, 106 ) -> float: 107 """ 108 Optimize the multiplier for a given quantile. 109 """ 110 if not 0 < quantile < 1: 111 raise ValueError("Quantile should be between 0 and 1.") 112 113 def objective(log_multiplier): 114 """ 115 Objective function for optimization. 116 """ 117 # Convert to positive multiplier using exp 118 multiplier = np.exp(log_multiplier[0]) 119 if self.scoring == "predictions": 120 assert base_predictions is not None, "base_predictions must be not None" 121 # Calculate predictions 122 if prev_predictions is None: 123 # For first quantile, subtract from conditional expectation 124 predictions = base_predictions - multiplier * np.abs( 125 base_predictions 126 ) 127 else: 128 # For other quantiles, add to previous quantile 129 offset = multiplier * np.abs(base_predictions) 130 predictions = prev_predictions + offset 131 elif self.scoring in ("residuals", "conformal"): 132 assert ( 133 scoring_residuals is not None 134 ), "scoring_residuals must be not None" 135 # print("scoring_residuals", scoring_residuals) 136 # Calculate predictions 137 if prev_predictions is None: 138 # For first quantile, subtract from conditional expectation 139 predictions = base_predictions - multiplier * np.std( 140 scoring_residuals 141 ) 142 # print("predictions", predictions) 143 else: 144 # For other quantiles, add to previous quantile 145 offset = multiplier * np.std(scoring_residuals) 146 predictions = prev_predictions + offset 147 elif self.scoring in ("studentized", "conformal-studentized"): 148 assert ( 149 scoring_residuals is not None 150 ), "scoring_residuals must be not None" 151 # Calculate predictions 152 if prev_predictions is None: 153 # For first quantile, subtract from conditional expectation 154 predictions = ( 155 base_predictions - multiplier * self.student_multiplier_ 156 ) 157 # print("predictions", predictions) 158 else: 159 # For other quantiles, add to previous quantile 160 offset = multiplier * self.student_multiplier_ 161 predictions = prev_predictions + offset 162 else: 163 raise ValueError("Invalid argument 'scoring'") 164 165 residuals = y - predictions 166 return self._compute_quantile_loss(residuals, quantile) 167 168 # Optimize in log space for numerical stability 169 # bounds = [(-10, 10)] # log space bounds 170 bounds = [(-100, 100)] # log space bounds 171 result = differential_evolution( 172 objective, 173 bounds, 174 # popsize=15, 175 # maxiter=100, 176 # tol=1e-4, 177 popsize=25, 178 maxiter=200, 179 tol=1e-6, 180 disp=False, 181 ) 182 183 return np.exp(result.x[0]) 184 185 def fit(self, X: np.ndarray, y: np.ndarray): 186 """Fit the model to the data. 187 188 Parameters: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number of samples and 192 n_features is the number of features. 193 y: array-like, shape = [n_samples] 194 Target values. 195 """ 196 self.obj_ = clone(self.obj) 197 if self.scoring in ("predictions", "residuals"): 198 self.obj_.fit(X, y) 199 base_predictions = self.obj_.predict(X) 200 scoring_residuals = y - base_predictions 201 self.scoring_residuals_ = scoring_residuals 202 elif self.scoring == "conformal": 203 X_train, X_calib, y_train, y_calib = train_test_split( 204 X, y, test_size=0.5, random_state=42 205 ) 206 self.obj_.fit(X_train, y_train) 207 scoring_residuals = y_calib - self.obj_.predict( 208 X_calib 209 ) # These are calibration predictions 210 self.scoring_residuals_ = scoring_residuals 211 # Update base_predictions to use training predictions for optimization 212 self.obj_.fit(X_calib, y_calib) 213 base_predictions = self.obj_.predict(X_calib) 214 elif self.scoring in ("studentized", "conformal-studentized"): 215 # Calculate student multiplier 216 if self.scoring == "conformal-studentized": 217 X_train, X_calib, y_train, y_calib = train_test_split( 218 X, y, test_size=0.5, random_state=42 219 ) 220 self.obj_.fit(X_train, y_train) 221 scoring_residuals = y_calib - self.obj_.predict(X_calib) 222 # Calculate studentized multiplier using calibration data 223 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 224 len(y_calib) 225 ) 226 self.obj_.fit(X_calib, y_calib) 227 base_predictions = self.obj_.predict(X_calib) 228 else: # regular studentized 229 self.obj_.fit(X, y) 230 base_predictions = self.obj_.predict(X) 231 scoring_residuals = y - base_predictions 232 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y)) 233 234 # Initialize storage for multipliers 235 self.offset_multipliers_ = [] 236 # Keep track of current predictions for each quantile 237 current_predictions = None 238 239 # Fit each quantile sequentially 240 for i, quantile in enumerate(self.quantiles): 241 if self.scoring == "predictions": 242 multiplier = self._optimize_multiplier( 243 y=y, 244 base_predictions=base_predictions, 245 prev_predictions=current_predictions, 246 quantile=quantile, 247 ) 248 249 self.offset_multipliers_.append(multiplier) 250 251 # Update current predictions 252 if current_predictions is None: 253 # First quantile (lowest) 254 current_predictions = base_predictions - multiplier * np.abs( 255 base_predictions 256 ) 257 else: 258 # Subsequent quantiles 259 offset = multiplier * np.abs(base_predictions) 260 current_predictions = current_predictions + offset 261 elif self.scoring == "residuals": 262 multiplier = self._optimize_multiplier( 263 y=y, 264 base_predictions=base_predictions, 265 scoring_residuals=scoring_residuals, 266 prev_predictions=current_predictions, 267 quantile=quantile, 268 ) 269 270 self.offset_multipliers_.append(multiplier) 271 272 # Update current predictions 273 if current_predictions is None: 274 # First quantile (lowest) 275 current_predictions = base_predictions - multiplier * np.std( 276 scoring_residuals 277 ) 278 else: 279 # Subsequent quantiles 280 offset = multiplier * np.std(scoring_residuals) 281 current_predictions = current_predictions + offset 282 elif self.scoring == "conformal": 283 multiplier = self._optimize_multiplier( 284 y=y_calib, 285 base_predictions=base_predictions, 286 scoring_residuals=scoring_residuals, 287 prev_predictions=current_predictions, 288 quantile=quantile, 289 ) 290 291 self.offset_multipliers_.append(multiplier) 292 293 # Update current predictions 294 if current_predictions is None: 295 # First quantile (lowest) 296 current_predictions = base_predictions - multiplier * np.std( 297 scoring_residuals 298 ) 299 else: 300 # Subsequent quantiles 301 offset = multiplier * np.std(scoring_residuals) 302 current_predictions = current_predictions + offset 303 elif self.scoring in ("studentized", "conformal-studentized"): 304 multiplier = self._optimize_multiplier( 305 y=y_calib if self.scoring == "conformal-studentized" else y, 306 base_predictions=base_predictions, 307 scoring_residuals=scoring_residuals, 308 prev_predictions=current_predictions, 309 quantile=quantile, 310 ) 311 312 self.offset_multipliers_.append(multiplier) 313 314 # Update current predictions 315 if current_predictions is None: 316 current_predictions = ( 317 base_predictions - multiplier * self.student_multiplier_ 318 ) 319 else: 320 offset = multiplier * self.student_multiplier_ 321 current_predictions = current_predictions + offset 322 323 return self 324 325 def predict(self, X, return_pi=False): 326 """Predict the target variable. 327 328 Parameters: 329 330 X: {array-like}, shape = [n_samples, n_features] 331 Training vectors, where n_samples is the number of samples and 332 n_features is the number of features. 333 334 return_pi: bool, default=True 335 Whether to return the prediction intervals. 336 """ 337 if self.obj_ is None or self.offset_multipliers_ is None: 338 raise ValueError("Model not fitted yet.") 339 340 base_predictions = self.obj_.predict(X) 341 all_predictions = [] 342 343 if self.scoring == "predictions": 344 345 # Generate first quantile 346 current_predictions = base_predictions - self.offset_multipliers_[ 347 0 348 ] * np.abs(base_predictions) 349 all_predictions.append(current_predictions) 350 351 # Generate remaining quantiles 352 for multiplier in self.offset_multipliers_[1:]: 353 offset = multiplier * np.abs(base_predictions) 354 current_predictions = current_predictions + offset 355 all_predictions.append(current_predictions) 356 357 elif self.scoring in ("residuals", "conformal"): 358 359 # Generate first quantile 360 current_predictions = base_predictions - self.offset_multipliers_[ 361 0 362 ] * np.std(self.scoring_residuals_) 363 all_predictions.append(current_predictions) 364 365 # Generate remaining quantiles 366 for multiplier in self.offset_multipliers_[1:]: 367 offset = multiplier * np.std(self.scoring_residuals_) 368 current_predictions = current_predictions + offset 369 all_predictions.append(current_predictions) 370 371 elif self.scoring in ("studentized", "conformal-studentized"): 372 # Generate first quantile 373 current_predictions = ( 374 base_predictions 375 - self.offset_multipliers_[0] * self.student_multiplier_ 376 ) 377 all_predictions.append(current_predictions) 378 379 # Generate remaining quantiles 380 for multiplier in self.offset_multipliers_[1:]: 381 offset = multiplier * self.student_multiplier_ 382 current_predictions = current_predictions + offset 383 all_predictions.append(current_predictions) 384 385 if return_pi == False: 386 return np.asarray(all_predictions[1]) 387 388 DescribeResult = namedtuple( 389 "DecribeResult", ["mean", "lower", "upper", "median"] 390 ) 391 DescribeResult.mean = base_predictions 392 DescribeResult.lower = np.asarray(all_predictions[0]) 393 DescribeResult.median = np.asarray(all_predictions[1]) 394 DescribeResult.upper = np.asarray(all_predictions[2]) 395 return DescribeResult
Quantile Regressor.
Parameters:
obj: base model (regression model)
The base regressor from which to build a
quantile regressor.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (regression model)
The base regressor from which to build a
quantile regressor.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
185 def fit(self, X: np.ndarray, y: np.ndarray): 186 """Fit the model to the data. 187 188 Parameters: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number of samples and 192 n_features is the number of features. 193 y: array-like, shape = [n_samples] 194 Target values. 195 """ 196 self.obj_ = clone(self.obj) 197 if self.scoring in ("predictions", "residuals"): 198 self.obj_.fit(X, y) 199 base_predictions = self.obj_.predict(X) 200 scoring_residuals = y - base_predictions 201 self.scoring_residuals_ = scoring_residuals 202 elif self.scoring == "conformal": 203 X_train, X_calib, y_train, y_calib = train_test_split( 204 X, y, test_size=0.5, random_state=42 205 ) 206 self.obj_.fit(X_train, y_train) 207 scoring_residuals = y_calib - self.obj_.predict( 208 X_calib 209 ) # These are calibration predictions 210 self.scoring_residuals_ = scoring_residuals 211 # Update base_predictions to use training predictions for optimization 212 self.obj_.fit(X_calib, y_calib) 213 base_predictions = self.obj_.predict(X_calib) 214 elif self.scoring in ("studentized", "conformal-studentized"): 215 # Calculate student multiplier 216 if self.scoring == "conformal-studentized": 217 X_train, X_calib, y_train, y_calib = train_test_split( 218 X, y, test_size=0.5, random_state=42 219 ) 220 self.obj_.fit(X_train, y_train) 221 scoring_residuals = y_calib - self.obj_.predict(X_calib) 222 # Calculate studentized multiplier using calibration data 223 self.student_multiplier_ = np.std(y_calib, ddof=1) / np.sqrt( 224 len(y_calib) 225 ) 226 self.obj_.fit(X_calib, y_calib) 227 base_predictions = self.obj_.predict(X_calib) 228 else: # regular studentized 229 self.obj_.fit(X, y) 230 base_predictions = self.obj_.predict(X) 231 scoring_residuals = y - base_predictions 232 self.student_multiplier_ = np.std(y, ddof=1) / np.sqrt(len(y)) 233 234 # Initialize storage for multipliers 235 self.offset_multipliers_ = [] 236 # Keep track of current predictions for each quantile 237 current_predictions = None 238 239 # Fit each quantile sequentially 240 for i, quantile in enumerate(self.quantiles): 241 if self.scoring == "predictions": 242 multiplier = self._optimize_multiplier( 243 y=y, 244 base_predictions=base_predictions, 245 prev_predictions=current_predictions, 246 quantile=quantile, 247 ) 248 249 self.offset_multipliers_.append(multiplier) 250 251 # Update current predictions 252 if current_predictions is None: 253 # First quantile (lowest) 254 current_predictions = base_predictions - multiplier * np.abs( 255 base_predictions 256 ) 257 else: 258 # Subsequent quantiles 259 offset = multiplier * np.abs(base_predictions) 260 current_predictions = current_predictions + offset 261 elif self.scoring == "residuals": 262 multiplier = self._optimize_multiplier( 263 y=y, 264 base_predictions=base_predictions, 265 scoring_residuals=scoring_residuals, 266 prev_predictions=current_predictions, 267 quantile=quantile, 268 ) 269 270 self.offset_multipliers_.append(multiplier) 271 272 # Update current predictions 273 if current_predictions is None: 274 # First quantile (lowest) 275 current_predictions = base_predictions - multiplier * np.std( 276 scoring_residuals 277 ) 278 else: 279 # Subsequent quantiles 280 offset = multiplier * np.std(scoring_residuals) 281 current_predictions = current_predictions + offset 282 elif self.scoring == "conformal": 283 multiplier = self._optimize_multiplier( 284 y=y_calib, 285 base_predictions=base_predictions, 286 scoring_residuals=scoring_residuals, 287 prev_predictions=current_predictions, 288 quantile=quantile, 289 ) 290 291 self.offset_multipliers_.append(multiplier) 292 293 # Update current predictions 294 if current_predictions is None: 295 # First quantile (lowest) 296 current_predictions = base_predictions - multiplier * np.std( 297 scoring_residuals 298 ) 299 else: 300 # Subsequent quantiles 301 offset = multiplier * np.std(scoring_residuals) 302 current_predictions = current_predictions + offset 303 elif self.scoring in ("studentized", "conformal-studentized"): 304 multiplier = self._optimize_multiplier( 305 y=y_calib if self.scoring == "conformal-studentized" else y, 306 base_predictions=base_predictions, 307 scoring_residuals=scoring_residuals, 308 prev_predictions=current_predictions, 309 quantile=quantile, 310 ) 311 312 self.offset_multipliers_.append(multiplier) 313 314 # Update current predictions 315 if current_predictions is None: 316 current_predictions = ( 317 base_predictions - multiplier * self.student_multiplier_ 318 ) 319 else: 320 offset = multiplier * self.student_multiplier_ 321 current_predictions = current_predictions + offset 322 323 return self
Fit the model to the data.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
325 def predict(self, X, return_pi=False): 326 """Predict the target variable. 327 328 Parameters: 329 330 X: {array-like}, shape = [n_samples, n_features] 331 Training vectors, where n_samples is the number of samples and 332 n_features is the number of features. 333 334 return_pi: bool, default=True 335 Whether to return the prediction intervals. 336 """ 337 if self.obj_ is None or self.offset_multipliers_ is None: 338 raise ValueError("Model not fitted yet.") 339 340 base_predictions = self.obj_.predict(X) 341 all_predictions = [] 342 343 if self.scoring == "predictions": 344 345 # Generate first quantile 346 current_predictions = base_predictions - self.offset_multipliers_[ 347 0 348 ] * np.abs(base_predictions) 349 all_predictions.append(current_predictions) 350 351 # Generate remaining quantiles 352 for multiplier in self.offset_multipliers_[1:]: 353 offset = multiplier * np.abs(base_predictions) 354 current_predictions = current_predictions + offset 355 all_predictions.append(current_predictions) 356 357 elif self.scoring in ("residuals", "conformal"): 358 359 # Generate first quantile 360 current_predictions = base_predictions - self.offset_multipliers_[ 361 0 362 ] * np.std(self.scoring_residuals_) 363 all_predictions.append(current_predictions) 364 365 # Generate remaining quantiles 366 for multiplier in self.offset_multipliers_[1:]: 367 offset = multiplier * np.std(self.scoring_residuals_) 368 current_predictions = current_predictions + offset 369 all_predictions.append(current_predictions) 370 371 elif self.scoring in ("studentized", "conformal-studentized"): 372 # Generate first quantile 373 current_predictions = ( 374 base_predictions 375 - self.offset_multipliers_[0] * self.student_multiplier_ 376 ) 377 all_predictions.append(current_predictions) 378 379 # Generate remaining quantiles 380 for multiplier in self.offset_multipliers_[1:]: 381 offset = multiplier * self.student_multiplier_ 382 current_predictions = current_predictions + offset 383 all_predictions.append(current_predictions) 384 385 if return_pi == False: 386 return np.asarray(all_predictions[1]) 387 388 DescribeResult = namedtuple( 389 "DecribeResult", ["mean", "lower", "upper", "median"] 390 ) 391 DescribeResult.mean = base_predictions 392 DescribeResult.lower = np.asarray(all_predictions[0]) 393 DescribeResult.median = np.asarray(all_predictions[1]) 394 DescribeResult.upper = np.asarray(all_predictions[2]) 395 return DescribeResult
Predict the target variable.
Parameters:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
return_pi: bool, default=True
Whether to return the prediction intervals.
41class QuantileClassifier(BaseEstimator, ClassifierMixin): 42 """ 43 Quantile Classifier. 44 45 Parameters: 46 47 obj: base model (classification model) 48 The base classifier from which to build a 49 quantile classifier. 50 51 level: int, default=95 52 The level of the quantiles to compute. 53 54 scoring: str, default="predictions" 55 The scoring to use for the optimization and constructing 56 prediction intervals (predictions, residuals, conformal, 57 studentized, conformal-studentized). 58 59 Attributes: 60 61 obj_ : base model (classification model) 62 The base classifier from which to build a 63 quantile classifier. 64 65 offset_multipliers_ : list 66 The multipliers for the offset. 67 68 scoring_residuals_ : list 69 The residuals for the scoring. 70 71 student_multiplier_ : float 72 The multiplier for the student. 73 74 75 """ 76 77 def __init__(self, obj, level=95, scoring="predictions"): 78 assert scoring in ( 79 "predictions", 80 "residuals", 81 "conformal", 82 "studentized", 83 "conformal-studentized", 84 ), "scoring must be 'predictions' or 'residuals'" 85 self.obj = obj 86 quantileregressor = QuantileRegressor(self.obj) 87 quantileregressor.predict = partial(quantileregressor.predict, return_pi=True) 88 self.obj_ = SimpleMultitaskClassifier(quantileregressor) 89 90 def fit(self, X, y, **kwargs): 91 self.obj_.fit(X, y, **kwargs) 92 93 def predict(self, X, **kwargs): 94 return self.obj_.predict(X, **kwargs) 95 96 def predict_proba(self, X, **kwargs): 97 return self.obj_.predict_proba(X, **kwargs)
Quantile Classifier.
Parameters:
obj: base model (classification model)
The base classifier from which to build a
quantile classifier.
level: int, default=95
The level of the quantiles to compute.
scoring: str, default="predictions"
The scoring to use for the optimization and constructing
prediction intervals (predictions, residuals, conformal,
studentized, conformal-studentized).
Attributes:
obj_ : base model (classification model)
The base classifier from which to build a
quantile classifier.
offset_multipliers_ : list
The multipliers for the offset.
scoring_residuals_ : list
The residuals for the scoring.
student_multiplier_ : float
The multiplier for the student.
18class RandomBagRegressor(RandomBag, RegressorMixin): 19 """Randomized 'Bagging' Regression model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model''s 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 ```python 93 import numpy as np 94 import nnetsauce as ns 95 from sklearn.datasets import fetch_california_housing 96 from sklearn.tree import DecisionTreeRegressor 97 from sklearn.model_selection import train_test_split 98 99 X, y = fetch_california_housing(return_X_y=True, as_frame=False) 100 101 # split data into training test and test set 102 X_train, X_test, y_train, y_test = train_test_split(X, y, 103 test_size=0.2, random_state=13) 104 105 # Requires further tuning 106 obj = DecisionTreeRegressor(max_depth=3, random_state=123) 107 obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False, 108 n_estimators=50, 109 col_sample=0.9, row_sample=0.9, 110 dropout=0, n_clusters=0, verbose=1) 111 112 obj2.fit(X_train, y_train) 113 114 print(np.sqrt(obj2.score(X_test, y_test))) # RMSE 115 116 ``` 117 118 """ 119 120 # construct the object ----- 121 122 def __init__( 123 self, 124 obj, 125 n_estimators=10, 126 n_hidden_features=1, 127 activation_name="relu", 128 a=0.01, 129 nodes_sim="sobol", 130 bias=True, 131 dropout=0, 132 direct_link=False, 133 n_clusters=2, 134 cluster_encode=True, 135 type_clust="kmeans", 136 type_scaling=("std", "std", "std"), 137 col_sample=1, 138 row_sample=1, 139 n_jobs=None, 140 seed=123, 141 verbose=1, 142 backend="cpu", 143 ): 144 super().__init__( 145 obj=obj, 146 n_estimators=n_estimators, 147 n_hidden_features=n_hidden_features, 148 activation_name=activation_name, 149 a=a, 150 nodes_sim=nodes_sim, 151 bias=bias, 152 dropout=dropout, 153 direct_link=direct_link, 154 n_clusters=n_clusters, 155 cluster_encode=cluster_encode, 156 type_clust=type_clust, 157 type_scaling=type_scaling, 158 col_sample=col_sample, 159 row_sample=row_sample, 160 seed=seed, 161 backend=backend, 162 ) 163 164 self.type_fit = "regression" 165 self.verbose = verbose 166 self.n_jobs = n_jobs 167 self.voter_ = {} 168 169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 230 ) 231 else: 232 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 233 delayed(fit_estimators)(m) for m in range(self.n_estimators) 234 ) 235 236 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 237 238 self.n_estimators = len(self.voter_) 239 240 return self 241 242 def predict(self, X, weights=None, **kwargs): 243 """Predict for test data X. 244 245 Args: 246 247 X: {array-like}, shape = [n_samples, n_features] 248 Training vectors, where n_samples is the number 249 of samples and n_features is the number of features. 250 251 **kwargs: additional parameters to be passed to 252 self.cook_test_set 253 254 Returns: 255 256 estimates for test data: {array-like} 257 258 """ 259 260 def calculate_preds(voter, weights=None): 261 ensemble_preds = 0 262 263 n_iter = len(voter) 264 265 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 266 267 if weights is None: 268 for idx, elt in voter.items(): 269 ensemble_preds += elt.predict(X) 270 271 return ensemble_preds / n_iter 272 273 # if weights is not None: 274 for idx, elt in voter.items(): 275 ensemble_preds += weights[idx] * elt.predict(X) 276 277 return ensemble_preds 278 279 # end calculate_preds ---- 280 281 if weights is None: 282 return calculate_preds(self.voter_) 283 284 # if weights is not None: 285 self.weights = weights 286 287 return calculate_preds(self.voter_, weights)
Randomized 'Bagging' Regression model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model''s
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
import numpy as np
import nnetsauce as ns
from sklearn.datasets import fetch_california_housing
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
X, y = fetch_california_housing(return_X_y=True, as_frame=False)
# split data into training test and test set
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=13)
# Requires further tuning
obj = DecisionTreeRegressor(max_depth=3, random_state=123)
obj2 = ns.RandomBagRegressor(obj=obj, direct_link=False,
n_estimators=50,
col_sample=0.9, row_sample=0.9,
dropout=0, n_clusters=0, verbose=1)
obj2.fit(X_train, y_train)
print(np.sqrt(obj2.score(X_test, y_test))) # RMSE
169 def fit(self, X, y, **kwargs): 170 """Fit Random 'Bagging' model to training data (X, y). 171 172 Args: 173 174 X: {array-like}, shape = [n_samples, n_features] 175 Training vectors, where n_samples is the number 176 of samples and n_features is the number of features. 177 178 y: array-like, shape = [n_samples] 179 Target values. 180 181 **kwargs: additional parameters to be passed to 182 self.cook_training_set or self.obj.fit 183 184 Returns: 185 186 self: object 187 188 """ 189 190 base_learner = CustomRegressor( 191 self.obj, 192 n_hidden_features=self.n_hidden_features, 193 activation_name=self.activation_name, 194 a=self.a, 195 nodes_sim=self.nodes_sim, 196 bias=self.bias, 197 dropout=self.dropout, 198 direct_link=self.direct_link, 199 n_clusters=self.n_clusters, 200 type_clust=self.type_clust, 201 type_scaling=self.type_scaling, 202 col_sample=self.col_sample, 203 row_sample=self.row_sample, 204 seed=self.seed, 205 ) 206 207 # 1 - Sequential training ----- 208 209 if self.n_jobs is None: 210 self.voter_ = rbagloop_regression( 211 base_learner, X, y, self.n_estimators, self.verbose, self.seed 212 ) 213 214 self.n_estimators = len(self.voter_) 215 216 return self 217 218 # 2 - Parallel training ----- 219 # buggy 220 # if self.n_jobs is not None: 221 def fit_estimators(m): 222 base_learner__ = deepcopy(base_learner) 223 base_learner__.set_params(seed=self.seed + m * 1000) 224 base_learner__.fit(X, y, **kwargs) 225 return base_learner__ 226 227 if self.verbose == 1: 228 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 229 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 230 ) 231 else: 232 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 233 delayed(fit_estimators)(m) for m in range(self.n_estimators) 234 ) 235 236 self.voter_ = {i: elt for i, elt in enumerate(voters_list)} 237 238 self.n_estimators = len(self.voter_) 239 240 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
242 def predict(self, X, weights=None, **kwargs): 243 """Predict for test data X. 244 245 Args: 246 247 X: {array-like}, shape = [n_samples, n_features] 248 Training vectors, where n_samples is the number 249 of samples and n_features is the number of features. 250 251 **kwargs: additional parameters to be passed to 252 self.cook_test_set 253 254 Returns: 255 256 estimates for test data: {array-like} 257 258 """ 259 260 def calculate_preds(voter, weights=None): 261 ensemble_preds = 0 262 263 n_iter = len(voter) 264 265 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 266 267 if weights is None: 268 for idx, elt in voter.items(): 269 ensemble_preds += elt.predict(X) 270 271 return ensemble_preds / n_iter 272 273 # if weights is not None: 274 for idx, elt in voter.items(): 275 ensemble_preds += weights[idx] * elt.predict(X) 276 277 return ensemble_preds 278 279 # end calculate_preds ---- 280 281 if weights is None: 282 return calculate_preds(self.voter_) 283 284 # if weights is not None: 285 self.weights = weights 286 287 return calculate_preds(self.voter_, weights)
Predict for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
estimates for test data: {array-like}
18class RandomBagClassifier(RandomBag, ClassifierMixin): 19 """Randomized 'Bagging' Classification model 20 21 Parameters: 22 23 obj: object 24 any object containing a method fit (obj.fit()) and a method predict 25 (obj.predict()) 26 27 n_estimators: int 28 number of boosting iterations 29 30 n_hidden_features: int 31 number of nodes in the hidden layer 32 33 activation_name: str 34 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 35 36 a: float 37 hyperparameter for 'prelu' or 'elu' activation function 38 39 nodes_sim: str 40 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 41 'uniform' 42 43 bias: boolean 44 indicates if the hidden layer contains a bias term (True) or not 45 (False) 46 47 dropout: float 48 regularization parameter; (random) percentage of nodes dropped out 49 of the training 50 51 direct_link: boolean 52 indicates if the original predictors are included (True) in model's 53 fitting or not (False) 54 55 n_clusters: int 56 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 57 no clustering) 58 59 cluster_encode: bool 60 defines how the variable containing clusters is treated (default is one-hot) 61 if `False`, then labels are used, without one-hot encoding 62 63 type_clust: str 64 type of clustering method: currently k-means ('kmeans') or Gaussian 65 Mixture Model ('gmm') 66 67 type_scaling: a tuple of 3 strings 68 scaling methods for inputs, hidden layer, and clustering respectively 69 (and when relevant). 70 Currently available: standardization ('std') or MinMax scaling ('minmax') 71 72 col_sample: float 73 percentage of covariates randomly chosen for training 74 75 row_sample: float 76 percentage of rows chosen for training, by stratified bootstrapping 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 voter_: dict 87 dictionary containing all the fitted base-learners 88 89 90 Examples: 91 92 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py) 93 94 ```python 95 import nnetsauce as ns 96 from sklearn.datasets import load_breast_cancer 97 from sklearn.tree import DecisionTreeClassifier 98 from sklearn.model_selection import train_test_split 99 from sklearn import metrics 100 from time import time 101 102 103 breast_cancer = load_breast_cancer() 104 Z = breast_cancer.data 105 t = breast_cancer.target 106 np.random.seed(123) 107 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 108 109 # decision tree 110 clf = DecisionTreeClassifier(max_depth=2, random_state=123) 111 fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2, 112 direct_link=True, 113 n_estimators=100, 114 col_sample=0.9, row_sample=0.9, 115 dropout=0.3, n_clusters=0, verbose=1) 116 117 start = time() 118 fit_obj.fit(X_train, y_train) 119 print(f"Elapsed {time() - start}") 120 121 print(fit_obj.score(X_test, y_test)) 122 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 123 124 start = time() 125 preds = fit_obj.predict(X_test) 126 print(f"Elapsed {time() - start}") 127 print(metrics.classification_report(preds, y_test)) 128 ``` 129 130 """ 131 132 # construct the object ----- 133 134 def __init__( 135 self, 136 obj, 137 n_estimators=10, 138 n_hidden_features=1, 139 activation_name="relu", 140 a=0.01, 141 nodes_sim="sobol", 142 bias=True, 143 dropout=0, 144 direct_link=False, 145 n_clusters=2, 146 cluster_encode=True, 147 type_clust="kmeans", 148 type_scaling=("std", "std", "std"), 149 col_sample=1, 150 row_sample=1, 151 n_jobs=None, 152 seed=123, 153 verbose=1, 154 backend="cpu", 155 ): 156 super().__init__( 157 obj=obj, 158 n_estimators=n_estimators, 159 n_hidden_features=n_hidden_features, 160 activation_name=activation_name, 161 a=a, 162 nodes_sim=nodes_sim, 163 bias=bias, 164 dropout=dropout, 165 direct_link=direct_link, 166 n_clusters=n_clusters, 167 cluster_encode=cluster_encode, 168 type_clust=type_clust, 169 type_scaling=type_scaling, 170 col_sample=col_sample, 171 row_sample=row_sample, 172 seed=seed, 173 backend=backend, 174 ) 175 176 self.type_fit = "classification" 177 self.verbose = verbose 178 self.n_jobs = n_jobs 179 self.voter_ = {} 180 181 def fit(self, X, y, **kwargs): 182 """Fit Random 'Bagging' model to training data (X, y). 183 184 Args: 185 186 X: {array-like}, shape = [n_samples, n_features] 187 Training vectors, where n_samples is the number 188 of samples and n_features is the number of features. 189 190 y: array-like, shape = [n_samples] 191 Target values. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 assert mx.is_factor(y), "y must contain only integers" 203 204 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 205 206 # training 207 self.n_classes = len(np.unique(y)) 208 209 base_learner = CustomClassifier( 210 self.obj, 211 n_hidden_features=self.n_hidden_features, 212 activation_name=self.activation_name, 213 a=self.a, 214 nodes_sim=self.nodes_sim, 215 bias=self.bias, 216 dropout=self.dropout, 217 direct_link=self.direct_link, 218 n_clusters=self.n_clusters, 219 type_clust=self.type_clust, 220 type_scaling=self.type_scaling, 221 col_sample=self.col_sample, 222 row_sample=self.row_sample, 223 seed=self.seed, 224 ) 225 226 # 1 - Sequential training ----- 227 228 if self.n_jobs is None: 229 self.voter_ = rbagloop_classification( 230 base_learner, X, y, self.n_estimators, self.verbose, self.seed 231 ) 232 233 self.n_estimators = len(self.voter_) 234 235 return self 236 237 # 2 - Parallel training ----- 238 # buggy 239 # if self.n_jobs is not None: 240 def fit_estimators(m): 241 base_learner__ = deepcopy(base_learner) 242 base_learner__.set_params(seed=self.seed + m * 1000) 243 base_learner__.fit(X, y, **kwargs) 244 return base_learner__ 245 246 if self.verbose == 1: 247 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 248 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 249 ) 250 else: 251 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 252 delayed(fit_estimators)(m) for m in range(self.n_estimators) 253 ) 254 255 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 256 257 self.n_estimators = len(self.voter_) 258 self.classes_ = np.unique(y) 259 return self 260 261 def predict(self, X, weights=None, **kwargs): 262 """Predict test data X. 263 264 Args: 265 266 X: {array-like}, shape = [n_samples, n_features] 267 Training vectors, where n_samples is the number 268 of samples and n_features is the number of features. 269 270 **kwargs: additional parameters to be passed to 271 self.cook_test_set 272 273 Returns: 274 275 model predictions: {array-like} 276 277 """ 278 return self.predict_proba(X, weights, **kwargs).argmax(axis=1) 279 280 def predict_proba(self, X, weights=None, **kwargs): 281 """Predict probabilities for test data X. 282 283 Args: 284 285 X: {array-like}, shape = [n_samples, n_features] 286 Training vectors, where n_samples is the number 287 of samples and n_features is the number of features. 288 289 **kwargs: additional parameters to be passed to 290 self.cook_test_set 291 292 Returns: 293 294 probability estimates for test data: {array-like} 295 296 """ 297 298 def calculate_probas(voter, weights=None, verbose=None): 299 ensemble_proba = 0 300 301 n_iter = len(voter) 302 303 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 304 305 if weights is None: 306 for idx, elt in voter.items(): 307 try: 308 ensemble_proba += elt.predict_proba(X) 309 310 # if verbose == 1: 311 # pbar.update(idx) 312 313 except: 314 continue 315 316 # if verbose == 1: 317 # pbar.update(n_iter) 318 319 return ensemble_proba / n_iter 320 321 # if weights is not None: 322 for idx, elt in voter.items(): 323 ensemble_proba += weights[idx] * elt.predict_proba(X) 324 325 # if verbose == 1: 326 # pbar.update(idx) 327 328 # if verbose == 1: 329 # pbar.update(n_iter) 330 331 return ensemble_proba 332 333 # end calculate_probas ---- 334 335 if self.n_jobs is None: 336 # if self.verbose == 1: 337 # pbar = Progbar(self.n_estimators) 338 339 if weights is None: 340 return calculate_probas(self.voter_, verbose=self.verbose) 341 342 # if weights is not None: 343 self.weights = weights 344 345 return calculate_probas(self.voter_, weights, verbose=self.verbose) 346 347 # if self.n_jobs is not None: 348 def predict_estimator(m): 349 try: 350 return self.voter_[m].predict_proba(X) 351 except: 352 pass 353 354 if self.verbose == 1: 355 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 356 delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators)) 357 ) 358 359 else: 360 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 361 delayed(predict_estimator)(m) for m in range(self.n_estimators) 362 ) 363 364 ensemble_proba = 0 365 366 if weights is None: 367 for i in range(self.n_estimators): 368 ensemble_proba += preds[i] 369 370 return ensemble_proba / self.n_estimators 371 372 for i in range(self.n_estimators): 373 ensemble_proba += weights[i] * preds[i] 374 375 return ensemble_proba
Randomized 'Bagging' Classification model
Parameters:
obj: object
any object containing a method fit (obj.fit()) and a method predict
(obj.predict())
n_estimators: int
number of boosting iterations
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
col_sample: float
percentage of covariates randomly chosen for training
row_sample: float
percentage of rows chosen for training, by stratified bootstrapping
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
voter_: dict
dictionary containing all the fitted base-learners
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/randombag_classification.py
import nnetsauce as ns
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
# decision tree
clf = DecisionTreeClassifier(max_depth=2, random_state=123)
fit_obj = ns.RandomBagClassifier(clf, n_hidden_features=2,
direct_link=True,
n_estimators=100,
col_sample=0.9, row_sample=0.9,
dropout=0.3, n_clusters=0, verbose=1)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
181 def fit(self, X, y, **kwargs): 182 """Fit Random 'Bagging' model to training data (X, y). 183 184 Args: 185 186 X: {array-like}, shape = [n_samples, n_features] 187 Training vectors, where n_samples is the number 188 of samples and n_features is the number of features. 189 190 y: array-like, shape = [n_samples] 191 Target values. 192 193 **kwargs: additional parameters to be passed to 194 self.cook_training_set or self.obj.fit 195 196 Returns: 197 198 self: object 199 200 """ 201 202 assert mx.is_factor(y), "y must contain only integers" 203 204 self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn 205 206 # training 207 self.n_classes = len(np.unique(y)) 208 209 base_learner = CustomClassifier( 210 self.obj, 211 n_hidden_features=self.n_hidden_features, 212 activation_name=self.activation_name, 213 a=self.a, 214 nodes_sim=self.nodes_sim, 215 bias=self.bias, 216 dropout=self.dropout, 217 direct_link=self.direct_link, 218 n_clusters=self.n_clusters, 219 type_clust=self.type_clust, 220 type_scaling=self.type_scaling, 221 col_sample=self.col_sample, 222 row_sample=self.row_sample, 223 seed=self.seed, 224 ) 225 226 # 1 - Sequential training ----- 227 228 if self.n_jobs is None: 229 self.voter_ = rbagloop_classification( 230 base_learner, X, y, self.n_estimators, self.verbose, self.seed 231 ) 232 233 self.n_estimators = len(self.voter_) 234 235 return self 236 237 # 2 - Parallel training ----- 238 # buggy 239 # if self.n_jobs is not None: 240 def fit_estimators(m): 241 base_learner__ = deepcopy(base_learner) 242 base_learner__.set_params(seed=self.seed + m * 1000) 243 base_learner__.fit(X, y, **kwargs) 244 return base_learner__ 245 246 if self.verbose == 1: 247 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 248 delayed(fit_estimators)(m) for m in tqdm(range(self.n_estimators)) 249 ) 250 else: 251 voters_list = Parallel(n_jobs=self.n_jobs, prefer="threads")( 252 delayed(fit_estimators)(m) for m in range(self.n_estimators) 253 ) 254 255 self.voter_ = {idx: elt for idx, elt in enumerate(voters_list)} 256 257 self.n_estimators = len(self.voter_) 258 self.classes_ = np.unique(y) 259 return self
Fit Random 'Bagging' model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
261 def predict(self, X, weights=None, **kwargs): 262 """Predict test data X. 263 264 Args: 265 266 X: {array-like}, shape = [n_samples, n_features] 267 Training vectors, where n_samples is the number 268 of samples and n_features is the number of features. 269 270 **kwargs: additional parameters to be passed to 271 self.cook_test_set 272 273 Returns: 274 275 model predictions: {array-like} 276 277 """ 278 return self.predict_proba(X, weights, **kwargs).argmax(axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
280 def predict_proba(self, X, weights=None, **kwargs): 281 """Predict probabilities for test data X. 282 283 Args: 284 285 X: {array-like}, shape = [n_samples, n_features] 286 Training vectors, where n_samples is the number 287 of samples and n_features is the number of features. 288 289 **kwargs: additional parameters to be passed to 290 self.cook_test_set 291 292 Returns: 293 294 probability estimates for test data: {array-like} 295 296 """ 297 298 def calculate_probas(voter, weights=None, verbose=None): 299 ensemble_proba = 0 300 301 n_iter = len(voter) 302 303 assert n_iter > 0, "no estimator found in `RandomBag` ensemble" 304 305 if weights is None: 306 for idx, elt in voter.items(): 307 try: 308 ensemble_proba += elt.predict_proba(X) 309 310 # if verbose == 1: 311 # pbar.update(idx) 312 313 except: 314 continue 315 316 # if verbose == 1: 317 # pbar.update(n_iter) 318 319 return ensemble_proba / n_iter 320 321 # if weights is not None: 322 for idx, elt in voter.items(): 323 ensemble_proba += weights[idx] * elt.predict_proba(X) 324 325 # if verbose == 1: 326 # pbar.update(idx) 327 328 # if verbose == 1: 329 # pbar.update(n_iter) 330 331 return ensemble_proba 332 333 # end calculate_probas ---- 334 335 if self.n_jobs is None: 336 # if self.verbose == 1: 337 # pbar = Progbar(self.n_estimators) 338 339 if weights is None: 340 return calculate_probas(self.voter_, verbose=self.verbose) 341 342 # if weights is not None: 343 self.weights = weights 344 345 return calculate_probas(self.voter_, weights, verbose=self.verbose) 346 347 # if self.n_jobs is not None: 348 def predict_estimator(m): 349 try: 350 return self.voter_[m].predict_proba(X) 351 except: 352 pass 353 354 if self.verbose == 1: 355 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 356 delayed(predict_estimator)(m) for m in tqdm(range(self.n_estimators)) 357 ) 358 359 else: 360 preds = Parallel(n_jobs=self.n_jobs, prefer="threads")( 361 delayed(predict_estimator)(m) for m in range(self.n_estimators) 362 ) 363 364 ensemble_proba = 0 365 366 if weights is None: 367 for i in range(self.n_estimators): 368 ensemble_proba += preds[i] 369 370 return ensemble_proba / self.n_estimators 371 372 for i in range(self.n_estimators): 373 ensemble_proba += weights[i] * preds[i] 374 375 return ensemble_proba
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
16class RegressorUpdater(BaseEstimator, RegressorMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 regr: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, regr, alpha=0.5): 39 self.regr = regr 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.regr.coef_ 46 if isinstance(self.regr, Base): 47 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 53 if isinstance(self.regr, CustomRegressor): # nnetsauce model not deep --- 54 if check_is_fitted(self.regr) == False: 55 self.regr.fit(X, y, **kwargs) 56 self.n_obs_ = X.shape[0] 57 if hasattr(self.regr, "coef_"): 58 self.coef_ = self.regr.coef_ 59 return self 60 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 61 if hasattr(self.regr, "coef_"): 62 self.coef_ = self.regr.coef_ 63 return self 64 65 if ( 66 hasattr(self.regr, "coef_") == False 67 ): # sklearn model or CustomRegressor model --- 68 self.regr.fit(X, y) 69 self.n_obs_ = X.shape[0] 70 self.regr.fit(X, y) 71 if hasattr(self.regr, "stacked_obj"): 72 self.coef_ = self.regr.stacked_obj.coef_ 73 else: 74 self.coef_ = self.regr.coef_ 75 return self 76 self.n_obs_ = X.shape[0] 77 if hasattr(self.regr, "coef_"): 78 self.coef_ = self.regr.coef_ 79 return self 80 81 def predict(self, X): 82 # assert hasattr(self.regr, "coef_"), "model must have coef_ attribute" 83 return self.regr.predict(X) 84 85 def partial_fit(self, X, y): 86 87 assert hasattr( 88 self.regr, "coef_" 89 ), "model must be fitted first (i.e have 'coef_' attribute)" 90 assert ( 91 self.n_obs_ is not None 92 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 93 94 if len(X.shape) == 1: 95 X = X.reshape(1, -1) 96 97 assert X.shape[0] == 1, "X must have one row" 98 99 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 100 101 if isinstance(self.regr, Base): # nnetsauce model --- 102 103 newX = deepcopy(X) 104 105 if isinstance( 106 self.regr, CustomRegressor 107 ): # other nnetsauce model (CustomRegressor) --- 108 newX = self.regr.cook_test_set(X=X) 109 if isinstance(X, pd.DataFrame): 110 newx = newX.values.ravel() 111 else: 112 newx = newX.ravel() 113 114 else: # an sklearn model --- 115 116 if isinstance(X, pd.DataFrame): 117 newx = X.values.ravel() 118 else: 119 newx = X.ravel() 120 121 new_coef = self.regr.coef_ + self.updating_factor_ * np.dot( 122 newx, y - np.dot(newx, self.regr.coef_) 123 ) 124 self.regr.coef_ = _update_mean(self.regr.coef_, self.n_obs_, new_coef) 125 self.coef_ = deepcopy(self.regr.coef_) 126 self.n_obs_ += 1 127 return self
Update a regression model with new observations
Parameters
regr: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 53 if isinstance(self.regr, CustomRegressor): # nnetsauce model not deep --- 54 if check_is_fitted(self.regr) == False: 55 self.regr.fit(X, y, **kwargs) 56 self.n_obs_ = X.shape[0] 57 if hasattr(self.regr, "coef_"): 58 self.coef_ = self.regr.coef_ 59 return self 60 self.n_obs_ = self.regr.scaler_.n_samples_seen_ 61 if hasattr(self.regr, "coef_"): 62 self.coef_ = self.regr.coef_ 63 return self 64 65 if ( 66 hasattr(self.regr, "coef_") == False 67 ): # sklearn model or CustomRegressor model --- 68 self.regr.fit(X, y) 69 self.n_obs_ = X.shape[0] 70 self.regr.fit(X, y) 71 if hasattr(self.regr, "stacked_obj"): 72 self.coef_ = self.regr.stacked_obj.coef_ 73 else: 74 self.coef_ = self.regr.coef_ 75 return self 76 self.n_obs_ = X.shape[0] 77 if hasattr(self.regr, "coef_"): 78 self.coef_ = self.regr.coef_ 79 return self
16class ClassifierUpdater(BaseEstimator, ClassifierMixin): 17 """ 18 Update a regression model with new observations 19 20 Parameters 21 ---------- 22 clf: object 23 A regression model with a coef_ attribute 24 alpha: float 25 Updating factor's exponent 26 27 Attributes 28 ---------- 29 n_obs_: int 30 Number of observations 31 coef_: np.ndarray 32 Coefficients of the model 33 updating_factor_: float 34 Updating factor 35 36 """ 37 38 def __init__(self, clf, alpha=0.5): 39 self.clf = clf 40 self.alpha = alpha 41 self.n_obs_ = None 42 self.coef_ = None 43 self.updating_factor_ = None 44 try: 45 self.coef_ = self.clf.coef_ 46 if isinstance(self.clf, Base): 47 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 48 except AttributeError: 49 pass 50 51 def fit(self, X, y, **kwargs): 52 53 raise NotImplementedError("fit method is not implemented for ClassifierUpdater") 54 55 if isinstance(self.clf, CustomClassifier): # nnetsauce model not deep --- 56 if check_is_fitted(self.clf) == False: 57 self.clf.fit(X, y, **kwargs) 58 self.n_obs_ = X.shape[0] 59 if hasattr(self.clf, "coef_"): 60 self.coef_ = self.clf.coef_ 61 return self 62 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 63 if hasattr(self.clf, "coef_"): 64 self.coef_ = self.clf.coef_ 65 return self 66 67 if ( 68 hasattr(self.clf, "coef_") == False 69 ): # sklearn model or CustomClassifier model --- 70 self.clf.fit(X, y) 71 self.n_obs_ = X.shape[0] 72 self.clf.fit(X, y) 73 if hasattr(self.clf, "stacked_obj"): 74 self.coef_ = self.clf.stacked_obj.coef_ 75 else: 76 self.coef_ = self.clf.coef_ 77 return self 78 self.n_obs_ = X.shape[0] 79 if hasattr(self.clf, "coef_"): 80 self.coef_ = self.clf.coef_ 81 return self 82 83 def predict(self, X): 84 85 raise NotImplementedError( 86 "predict method is not implemented for ClassifierUpdater" 87 ) 88 # assert hasattr(self.clf, "coef_"), "model must have coef_ attribute" 89 return self.clf.predict(X) 90 91 def partial_fit(self, X, y): 92 93 raise NotImplementedError( 94 "partial_fit method is not implemented for ClassifierUpdater" 95 ) 96 97 assert hasattr( 98 self.clf, "coef_" 99 ), "model must be fitted first (i.e have 'coef_' attribute)" 100 assert ( 101 self.n_obs_ is not None 102 ), "model must be fitted first (i.e have 'n_obs_' attribute)" 103 104 if len(X.shape) == 1: 105 X = X.reshape(1, -1) 106 107 assert X.shape[0] == 1, "X must have one row" 108 109 self.updating_factor_ = self.n_obs_ ** (-self.alpha) 110 111 if isinstance(self.clf, Base): # nnetsauce model --- 112 113 newX = deepcopy(X) 114 115 if isinstance( 116 self.clf, CustomClassifier 117 ): # other nnetsauce model (CustomClassifier) --- 118 newX = self.clf.cook_test_set(X=X) 119 if isinstance(X, pd.DataFrame): 120 newx = newX.values.ravel() 121 else: 122 newx = newX.ravel() 123 124 else: # an sklearn model --- 125 126 if isinstance(X, pd.DataFrame): 127 newx = X.values.ravel() 128 else: 129 newx = X.ravel() 130 131 new_coef = self.clf.coef_ + self.updating_factor_ * np.dot( 132 newx, y - np.dot(newx, self.clf.coef_) 133 ) 134 self.clf.coef_ = _update_mean(self.clf.coef_, self.n_obs_, new_coef) 135 self.coef_ = deepcopy(self.clf.coef_) 136 self.n_obs_ += 1 137 return self
Update a regression model with new observations
Parameters
clf: object A regression model with a coef_ attribute alpha: float Updating factor's exponent
Attributes
n_obs_: int Number of observations coef_: np.ndarray Coefficients of the model updating_factor_: float Updating factor
51 def fit(self, X, y, **kwargs): 52 53 raise NotImplementedError("fit method is not implemented for ClassifierUpdater") 54 55 if isinstance(self.clf, CustomClassifier): # nnetsauce model not deep --- 56 if check_is_fitted(self.clf) == False: 57 self.clf.fit(X, y, **kwargs) 58 self.n_obs_ = X.shape[0] 59 if hasattr(self.clf, "coef_"): 60 self.coef_ = self.clf.coef_ 61 return self 62 self.n_obs_ = self.clf.scaler_.n_samples_seen_ 63 if hasattr(self.clf, "coef_"): 64 self.coef_ = self.clf.coef_ 65 return self 66 67 if ( 68 hasattr(self.clf, "coef_") == False 69 ): # sklearn model or CustomClassifier model --- 70 self.clf.fit(X, y) 71 self.n_obs_ = X.shape[0] 72 self.clf.fit(X, y) 73 if hasattr(self.clf, "stacked_obj"): 74 self.coef_ = self.clf.stacked_obj.coef_ 75 else: 76 self.coef_ = self.clf.coef_ 77 return self 78 self.n_obs_ = X.shape[0] 79 if hasattr(self.clf, "coef_"): 80 self.coef_ = self.clf.coef_ 81 return self
20class RidgeRegressor(Base, RegressorMixin): 21 """Basic Ridge Regression model. 22 23 Parameters: 24 lambda_: float or array-like 25 Ridge regularization parameter(s). Default is 0. 26 """ 27 28 def __init__( 29 self, 30 lambda_=0.0, 31 n_hidden_features=0, 32 activation_name="relu", 33 a=0.01, 34 nodes_sim="sobol", 35 bias=True, 36 dropout=0, 37 direct_link=True, 38 n_clusters=0, 39 cluster_encode=True, 40 type_clust="kmeans", 41 type_scaling=("std", "std", "std"), 42 col_sample=1, 43 row_sample=1, 44 seed=123, 45 backend="cpu", 46 ): 47 super().__init__( 48 n_hidden_features=n_hidden_features, 49 activation_name=activation_name, 50 a=a, 51 nodes_sim=nodes_sim, 52 bias=bias, 53 dropout=dropout, 54 direct_link=direct_link, 55 n_clusters=n_clusters, 56 cluster_encode=cluster_encode, 57 type_clust=type_clust, 58 type_scaling=type_scaling, 59 col_sample=col_sample, 60 row_sample=row_sample, 61 seed=seed, 62 backend=backend, 63 ) 64 self.lambda_ = lambda_ 65 66 def _center_scale_xy(self, X, y): 67 """Center X and y, scale X.""" 68 n = X.shape[0] 69 70 # Center X and y 71 X_mean = np.mean(X, axis=0) 72 y_mean = np.mean(y) 73 X_centered = X - X_mean 74 y_centered = y - y_mean 75 76 # Scale X 77 X_scale = np.sqrt(np.sum(X_centered**2, axis=0) / n) 78 # Avoid division by zero 79 X_scale = np.where(X_scale == 0, 1.0, X_scale) 80 X_scaled = X_centered / X_scale 81 82 return X_scaled, y_centered, X_mean, y_mean, X_scale 83 84 def fit(self, X, y): 85 """Fit Ridge regression model. 86 87 Parameters: 88 X : array-like of shape (n_samples, n_features) 89 Training data 90 y : array-like of shape (n_samples,) 91 Target values 92 93 Returns: 94 self : returns an instance of self. 95 """ 96 # Ensure numpy arrays 97 X = np.asarray(X) 98 y = np.asarray(y) 99 print(f"\nInput shapes - X: {X.shape}, y: {y.shape}") 100 print(f"First few X values: {X[:2]}") 101 print(f"First few y values: {y[:2]}") 102 103 if y.ndim == 2: 104 y = y.ravel() 105 106 # Center and scale 107 X_scaled, y_centered, self.X_mean_, self.y_mean_, self.X_scale_ = ( 108 self._center_scale_xy(X, y) 109 ) 110 111 # SVD decomposition 112 U, d, Vt = np.linalg.svd(X_scaled, full_matrices=False) 113 114 # Compute coefficients 115 rhs = np.dot(U.T, y_centered) 116 d2 = d**2 117 118 print(f"d2 shape: {d2.shape}") 119 print(f"rhs shape: {rhs.shape}") 120 print(f"Vt shape: {Vt.shape}") 121 122 if np.isscalar(self.lambda_): 123 div = d2 + self.lambda_ 124 a = (d * rhs) / div 125 print(f"\nSingle lambda case:") 126 print(f"lambda: {self.lambda_}") 127 print(f"div shape: {div.shape}") 128 print(f"a shape: {a.shape}") 129 self.coef_ = np.dot(Vt.T, a) / self.X_scale_ 130 print(f"coef shape: {self.coef_.shape}") 131 else: 132 coefs = [] 133 print(f"\nMultiple lambda case:") 134 for lambda_ in self.lambda_: 135 print(f"lambda: {lambda_}") 136 div = d2 + lambda_ 137 print(f"div shape: {div.shape}") 138 a = (d * rhs) / div 139 print(f"a shape: {a.shape}") 140 coef = np.dot(Vt.T, a) / self.X_scale_ 141 print(f"coef shape: {coef.shape}") 142 coefs.append(coef) 143 self.coef_ = np.array(coefs).T 144 print(f"final coefs shape: {self.coef_.shape}") 145 146 # Compute GCV, HKB and LW criteria 147 y_pred = self.predict(X) 148 try: 149 resid = y - y_pred 150 except Exception as e: 151 resid = y[:, np.newaxis] - y_pred 152 n, p = X.shape 153 if resid.ndim == 1: 154 s2 = np.sum(resid**2) / (n - p) 155 else: 156 s2 = np.sum(resid**2, axis=0) / (n - p) 157 158 self.HKB_ = (p - 2) * s2 / np.sum(self.coef_**2) 159 self.LW_ = (p - 2) * s2 * n / np.sum(y_pred**2) 160 161 if np.isscalar(self.lambda_): 162 div = d2 + self.lambda_ 163 self.GCV_ = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 164 else: 165 self.GCV_ = [] 166 for lambda_ in self.lambda_: 167 div = d2 + lambda_ 168 try: 169 gcv = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 170 except Exception as e: 171 gcv = ( 172 np.sum((y[:, np.newaxis] - y_pred) ** 2) 173 / (n - np.sum(d2 / div)) ** 2 174 ) 175 self.GCV_.append(gcv) 176 self.GCV_ = np.array(self.GCV_) 177 178 return self 179 180 def predict(self, X): 181 """Predict using the Ridge regression model. 182 183 Parameters: 184 X : array-like of shape (n_samples, n_features) 185 Samples to predict for 186 187 Returns: 188 y_pred : array-like of shape (n_samples,) 189 Returns predicted values. 190 """ 191 X = self.cook_test_set(X) 192 193 if self.backend == "cpu": 194 if np.isscalar(self.lambda_): 195 return ( 196 mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 197 + self.y_mean_ 198 ) 199 else: 200 return jnp.array( 201 [ 202 mo.safe_sparse_dot(X, coef, backend=self.backend) + self.y_mean_ 203 for coef in self.coef_.T 204 ] 205 ).T 206 else: 207 if np.isscalar(self.lambda_): 208 return ( 209 mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 210 + self.y_mean_ 211 ) 212 else: 213 return jnp.array( 214 [ 215 mo.safe_sparse_dot(X, coef, backend=self.backend) + self.y_mean_ 216 for coef in self.coef_.T 217 ] 218 ).T 219 220 def decision_function(self, X): 221 """Compute the decision function of X. 222 223 Parameters: 224 X : array-like of shape (n_samples, n_features) 225 Samples 226 227 Returns: 228 decision : array-like of shape (n_samples,) or (n_samples, n_lambdas) 229 Decision function of the input samples. The order of outputs is the same 230 as that of the provided lambda_ values. For a single lambda, returns 231 array of shape (n_samples,). For multiple lambdas, returns array of shape 232 (n_samples, n_lambdas). 233 """ 234 X = self.cook_test_set(X) 235 236 if self.backend == "cpu": 237 if np.isscalar(self.lambda_): 238 return mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 239 else: 240 return np.array( 241 [ 242 mo.safe_sparse_dot(X, coef, backend=self.backend) 243 for coef in self.coef_.T 244 ] 245 ).T 246 else: 247 if np.isscalar(self.lambda_): 248 return mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 249 else: 250 return jnp.array( 251 [ 252 mo.safe_sparse_dot(X, coef, backend=self.backend) 253 for coef in self.coef_.T 254 ] 255 ).T
Basic Ridge Regression model.
Parameters: lambda_: float or array-like Ridge regularization parameter(s). Default is 0.
84 def fit(self, X, y): 85 """Fit Ridge regression model. 86 87 Parameters: 88 X : array-like of shape (n_samples, n_features) 89 Training data 90 y : array-like of shape (n_samples,) 91 Target values 92 93 Returns: 94 self : returns an instance of self. 95 """ 96 # Ensure numpy arrays 97 X = np.asarray(X) 98 y = np.asarray(y) 99 print(f"\nInput shapes - X: {X.shape}, y: {y.shape}") 100 print(f"First few X values: {X[:2]}") 101 print(f"First few y values: {y[:2]}") 102 103 if y.ndim == 2: 104 y = y.ravel() 105 106 # Center and scale 107 X_scaled, y_centered, self.X_mean_, self.y_mean_, self.X_scale_ = ( 108 self._center_scale_xy(X, y) 109 ) 110 111 # SVD decomposition 112 U, d, Vt = np.linalg.svd(X_scaled, full_matrices=False) 113 114 # Compute coefficients 115 rhs = np.dot(U.T, y_centered) 116 d2 = d**2 117 118 print(f"d2 shape: {d2.shape}") 119 print(f"rhs shape: {rhs.shape}") 120 print(f"Vt shape: {Vt.shape}") 121 122 if np.isscalar(self.lambda_): 123 div = d2 + self.lambda_ 124 a = (d * rhs) / div 125 print(f"\nSingle lambda case:") 126 print(f"lambda: {self.lambda_}") 127 print(f"div shape: {div.shape}") 128 print(f"a shape: {a.shape}") 129 self.coef_ = np.dot(Vt.T, a) / self.X_scale_ 130 print(f"coef shape: {self.coef_.shape}") 131 else: 132 coefs = [] 133 print(f"\nMultiple lambda case:") 134 for lambda_ in self.lambda_: 135 print(f"lambda: {lambda_}") 136 div = d2 + lambda_ 137 print(f"div shape: {div.shape}") 138 a = (d * rhs) / div 139 print(f"a shape: {a.shape}") 140 coef = np.dot(Vt.T, a) / self.X_scale_ 141 print(f"coef shape: {coef.shape}") 142 coefs.append(coef) 143 self.coef_ = np.array(coefs).T 144 print(f"final coefs shape: {self.coef_.shape}") 145 146 # Compute GCV, HKB and LW criteria 147 y_pred = self.predict(X) 148 try: 149 resid = y - y_pred 150 except Exception as e: 151 resid = y[:, np.newaxis] - y_pred 152 n, p = X.shape 153 if resid.ndim == 1: 154 s2 = np.sum(resid**2) / (n - p) 155 else: 156 s2 = np.sum(resid**2, axis=0) / (n - p) 157 158 self.HKB_ = (p - 2) * s2 / np.sum(self.coef_**2) 159 self.LW_ = (p - 2) * s2 * n / np.sum(y_pred**2) 160 161 if np.isscalar(self.lambda_): 162 div = d2 + self.lambda_ 163 self.GCV_ = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 164 else: 165 self.GCV_ = [] 166 for lambda_ in self.lambda_: 167 div = d2 + lambda_ 168 try: 169 gcv = np.sum((y - y_pred) ** 2) / (n - np.sum(d2 / div)) ** 2 170 except Exception as e: 171 gcv = ( 172 np.sum((y[:, np.newaxis] - y_pred) ** 2) 173 / (n - np.sum(d2 / div)) ** 2 174 ) 175 self.GCV_.append(gcv) 176 self.GCV_ = np.array(self.GCV_) 177 178 return self
Fit Ridge regression model.
Parameters: X : array-like of shape (n_samples, n_features) Training data y : array-like of shape (n_samples,) Target values
Returns: self : returns an instance of self.
180 def predict(self, X): 181 """Predict using the Ridge regression model. 182 183 Parameters: 184 X : array-like of shape (n_samples, n_features) 185 Samples to predict for 186 187 Returns: 188 y_pred : array-like of shape (n_samples,) 189 Returns predicted values. 190 """ 191 X = self.cook_test_set(X) 192 193 if self.backend == "cpu": 194 if np.isscalar(self.lambda_): 195 return ( 196 mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 197 + self.y_mean_ 198 ) 199 else: 200 return jnp.array( 201 [ 202 mo.safe_sparse_dot(X, coef, backend=self.backend) + self.y_mean_ 203 for coef in self.coef_.T 204 ] 205 ).T 206 else: 207 if np.isscalar(self.lambda_): 208 return ( 209 mo.safe_sparse_dot(X, self.coef_, backend=self.backend) 210 + self.y_mean_ 211 ) 212 else: 213 return jnp.array( 214 [ 215 mo.safe_sparse_dot(X, coef, backend=self.backend) + self.y_mean_ 216 for coef in self.coef_.T 217 ] 218 ).T
Predict using the Ridge regression model.
Parameters: X : array-like of shape (n_samples, n_features) Samples to predict for
Returns: y_pred : array-like of shape (n_samples,) Returns predicted values.
23class Ridge2Regressor(Ridge2, RegressorMixin): 24 """Ridge regression with 2 regularization parameters derived from class Ridge 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 'cpu' or 'gpu' or 'tpu' 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 y_mean_: float 84 average response 85 86 """ 87 88 # construct the object ----- 89 90 def __init__( 91 self, 92 n_hidden_features=5, 93 activation_name="relu", 94 a=0.01, 95 nodes_sim="sobol", 96 bias=True, 97 dropout=0, 98 n_clusters=2, 99 cluster_encode=True, 100 type_clust="kmeans", 101 type_scaling=("std", "std", "std"), 102 lambda1=0.1, 103 lambda2=0.1, 104 seed=123, 105 backend="cpu", 106 ): 107 super().__init__( 108 n_hidden_features=n_hidden_features, 109 activation_name=activation_name, 110 a=a, 111 nodes_sim=nodes_sim, 112 bias=bias, 113 dropout=dropout, 114 n_clusters=n_clusters, 115 cluster_encode=cluster_encode, 116 type_clust=type_clust, 117 type_scaling=type_scaling, 118 lambda1=lambda1, 119 lambda2=lambda2, 120 seed=seed, 121 backend=backend, 122 ) 123 124 self.type_fit = "regression" 125 126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 170 np.repeat(1, Phi_X_.shape[1]) 171 ) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self 204 205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Ridge regression with 2 regularization parameters derived from class Ridge
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
'cpu' or 'gpu' or 'tpu'
Attributes:
beta_: {array-like}
regression coefficients
y_mean_: float
average response
126 def fit(self, X, y, **kwargs): 127 """Fit Ridge model to training data (X, y). 128 129 Args: 130 131 X: {array-like}, shape = [n_samples, n_features] 132 Training vectors, where n_samples is the number 133 of samples and n_features is the number of features. 134 135 y: array-like, shape = [n_samples] 136 Target values. 137 138 **kwargs: additional parameters to be passed to 139 self.cook_training_set or self.obj.fit 140 141 Returns: 142 143 self: object 144 145 """ 146 147 sys_platform = platform.system() 148 149 centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 150 151 n_X, p_X = X.shape 152 n_Z, p_Z = scaled_Z.shape 153 154 if self.n_clusters > 0: 155 if self.encode_clusters == True: 156 n_features = p_X + self.n_clusters 157 else: 158 n_features = p_X + 1 159 else: 160 n_features = p_X 161 162 X_ = scaled_Z[:, 0:n_features] 163 Phi_X_ = scaled_Z[:, n_features:p_Z] 164 165 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 166 np.repeat(1, n_features) 167 ) 168 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 169 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 170 np.repeat(1, Phi_X_.shape[1]) 171 ) 172 173 if sys_platform in ("Linux", "Darwin"): 174 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 175 else: 176 B_inv = pinv(B) 177 178 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 179 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 180 181 if sys_platform in ("Linux", "Darwin"): 182 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 183 else: 184 S_inv = pinv(S_mat) 185 186 Y = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 187 inv = mo.rbind( 188 mo.cbind( 189 x=B_inv + mo.crossprod(x=W, y=Y, backend=self.backend), 190 y=-np.transpose(Y), 191 backend=self.backend, 192 ), 193 mo.cbind(x=-Y, y=S_inv, backend=self.backend), 194 backend=self.backend, 195 ) 196 197 self.beta_ = mo.safe_sparse_dot( 198 a=inv, 199 b=mo.crossprod(x=scaled_Z, y=centered_y, backend=self.backend), 200 backend=self.backend, 201 ) 202 203 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
205 def predict(self, X, **kwargs): 206 """Predict test data X. 207 208 Args: 209 210 X: {array-like}, shape = [n_samples, n_features] 211 Training vectors, where n_samples is the number 212 of samples and n_features is the number of features. 213 214 **kwargs: additional parameters to be passed to 215 self.cook_test_set 216 217 Returns: 218 219 model predictions: {array-like} 220 221 """ 222 223 if len(X.shape) == 1: 224 n_features = X.shape[0] 225 new_X = mo.rbind( 226 x=X.reshape(1, n_features), 227 y=np.ones(n_features).reshape(1, n_features), 228 backend=self.backend, 229 ) 230 231 return ( 232 self.y_mean_ 233 + mo.safe_sparse_dot( 234 a=self.cook_test_set(new_X, **kwargs), 235 b=self.beta_, 236 backend=self.backend, 237 ) 238 )[0] 239 240 return self.y_mean_ + mo.safe_sparse_dot( 241 a=self.cook_test_set(X, **kwargs), 242 b=self.beta_, 243 backend=self.backend, 244 )
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
18class Ridge2Classifier(Ridge2, ClassifierMixin): 19 """Multinomial logit classification with 2 regularization parameters 20 21 Parameters: 22 23 n_hidden_features: int 24 number of nodes in the hidden layer 25 26 activation_name: str 27 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 28 29 a: float 30 hyperparameter for 'prelu' or 'elu' activation function 31 32 nodes_sim: str 33 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 34 'uniform' 35 36 bias: boolean 37 indicates if the hidden layer contains a bias term (True) or not 38 (False) 39 40 dropout: float 41 regularization parameter; (random) percentage of nodes dropped out 42 of the training 43 44 direct_link: boolean 45 indicates if the original predictors are included (True) in model's 46 fitting or not (False) 47 48 n_clusters: int 49 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 50 no clustering) 51 52 cluster_encode: bool 53 defines how the variable containing clusters is treated (default is one-hot) 54 if `False`, then labels are used, without one-hot encoding 55 56 type_clust: str 57 type of clustering method: currently k-means ('kmeans') or Gaussian 58 Mixture Model ('gmm') 59 60 type_scaling: a tuple of 3 strings 61 scaling methods for inputs, hidden layer, and clustering respectively 62 (and when relevant). 63 Currently available: standardization ('std') or MinMax scaling ('minmax') 64 65 lambda1: float 66 regularization parameter on direct link 67 68 lambda2: float 69 regularization parameter on hidden layer 70 71 solver: str 72 optimization function "L-BFGS-B", "Newton-CG", 73 "trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq", 74 "trust-ncg-lstsq" (see scipy.optimize.minimize) 75 When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq", 76 the initial value for the optimization is set to the least squares solution 77 78 seed: int 79 reproducibility seed for nodes_sim=='uniform' 80 81 backend: str 82 "cpu" or "gpu" or "tpu" 83 84 Attributes: 85 86 beta_: {array-like} 87 regression coefficients 88 89 classes_: {array-like} 90 unique classes in the target variable 91 92 minloglik_: float 93 minimum value of the negative log-likelihood 94 95 Examples: 96 97 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py) 98 99 ```python 100 import nnetsauce as ns 101 import numpy as np 102 from sklearn.datasets import load_breast_cancer 103 from sklearn.model_selection import train_test_split 104 from time import time 105 106 107 breast_cancer = load_breast_cancer() 108 X = breast_cancer.data 109 y = breast_cancer.target 110 111 # split data into training test and test set 112 np.random.seed(123) 113 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) 114 115 # create the model with nnetsauce 116 fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04, 117 lambda2 = 3.17392781e+02, 118 n_hidden_features=95, 119 n_clusters=2, 120 dropout = 3.62817383e-01, 121 type_clust = "gmm") 122 123 # fit the model on training set 124 start = time() 125 fit_obj.fit(X_train, y_train) 126 print(f"Elapsed {time() - start}") 127 128 # get the accuracy on test set 129 start = time() 130 print(fit_obj.score(X_test, y_test)) 131 print(f"Elapsed {time() - start}") 132 133 # get area under the curve on test set (auc) 134 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 135 ``` 136 137 138 """ 139 140 # construct the object ----- 141 142 def __init__( 143 self, 144 n_hidden_features=5, 145 activation_name="relu", 146 a=0.01, 147 nodes_sim="sobol", 148 bias=True, 149 dropout=0, 150 direct_link=True, 151 n_clusters=2, 152 cluster_encode=True, 153 type_clust="kmeans", 154 type_scaling=("std", "std", "std"), 155 lambda1=0.1, 156 lambda2=0.1, 157 solver="L-BFGS-B", 158 seed=123, 159 backend="cpu", 160 ): 161 super().__init__( 162 n_hidden_features=n_hidden_features, 163 activation_name=activation_name, 164 a=a, 165 nodes_sim=nodes_sim, 166 bias=bias, 167 dropout=dropout, 168 direct_link=direct_link, 169 n_clusters=n_clusters, 170 cluster_encode=cluster_encode, 171 type_clust=type_clust, 172 type_scaling=type_scaling, 173 lambda1=lambda1, 174 lambda2=lambda2, 175 seed=seed, 176 backend=backend, 177 ) 178 179 self.type_fit = "classification" 180 self.solver = solver 181 self.beta_ = None 182 self.classes_ = None 183 self.minloglik_ = None 184 185 def loglik(self, X, Y, **kwargs): 186 """Log-likelihood for training data (X, Y). 187 188 Args: 189 190 X: {array-like}, shape = [n_samples, n_features] 191 Training vectors, where n_samples is the number 192 of samples and n_features is the number of features. 193 194 Y: array-like, shape = [n_samples] 195 One-hot encode target values. 196 197 **kwargs: additional parameters to be passed to 198 self.cook_training_set or self.obj.fit 199 200 Returns: 201 202 """ 203 204 def loglik_grad_hess(Y, X, B, XB, hessian=True, **kwargs): 205 # nobs, n_classes 206 n, K = Y.shape 207 208 # total number of covariates 209 p = X.shape[1] 210 211 # initial number of covariates 212 init_p = p - self.n_hidden_features 213 214 max_double = 709.0 215 XB[XB > max_double] = max_double 216 exp_XB = np.exp(XB) 217 probs = exp_XB / exp_XB.sum(axis=1)[:, None] 218 219 # gradient ----- 220 # (Y - p) -> (n, K) 221 # X -> (n, p) 222 # (K, n) %*% (n, p) -> (K, p) 223 if hessian is False: 224 grad = ( 225 -mo.safe_sparse_dot(a=(Y - probs).T, b=X, backend=self.backend) / n 226 ) 227 grad += self.lambda1 * B[0:init_p, :].sum(axis=0)[:, None] 228 grad += self.lambda2 * B[init_p:p, :].sum(axis=0)[:, None] 229 230 return grad.flatten() 231 232 # hessian ----- 233 if hessian is True: 234 Kp = K * p 235 hess = np.zeros((Kp, Kp), float) 236 for k1 in range(K): 237 x_index = range(k1 * p, (k1 + 1) * p) 238 for k2 in range(k1, K): 239 y_index = range(k2 * p, (k2 + 1) * p) 240 H_sub = ( 241 -mo.safe_sparse_dot( 242 a=X.T, 243 b=(probs[:, k1] * probs[:, k2])[:, None] * X, 244 backend=self.backend, 245 ) 246 / n 247 ) # do not store 248 hess[np.ix_(x_index, y_index)] = hess[ 249 np.ix_(y_index, x_index) 250 ] = H_sub 251 252 return hess + (self.lambda1 + self.lambda2) * np.identity(Kp) 253 254 # total number of covariates 255 p = X.shape[1] 256 257 # initial number of covariates 258 init_p = p - self.n_hidden_features 259 260 # log-likelihood (1st return) 261 def loglik_func(x): 262 # (p, K) 263 B = x.reshape(Y.shape[1], p).T 264 265 # (n, K) 266 XB = mo.safe_sparse_dot(X, B, backend=self.backend) 267 268 res = -(np.sum(Y * XB, axis=1) - logsumexp(XB)).mean() 269 270 res += ( 271 0.5 272 * self.lambda1 273 * mo.squared_norm(B[0:init_p, :], backend=self.backend) 274 ) 275 res += ( 276 0.5 277 * self.lambda2 278 * mo.squared_norm(B[init_p:p, :], backend=self.backend) 279 ) 280 281 return res 282 283 # gradient of log-likelihood 284 def grad_func(x): 285 # (p, K) 286 B = x.reshape(Y.shape[1], p).T 287 288 return loglik_grad_hess( 289 Y=Y, 290 X=X, 291 B=B, 292 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 293 hessian=False, 294 **kwargs 295 ) 296 297 # hessian of log-likelihood 298 def hessian_func(x): 299 # (p, K) 300 B = x.reshape(Y.shape[1], p).T 301 302 return loglik_grad_hess( 303 Y=Y, 304 X=X, 305 B=B, 306 XB=mo.safe_sparse_dot(X, B, backend=self.backend), 307 hessian=True, 308 **kwargs 309 ) 310 311 return loglik_func, grad_func, hessian_func 312 313 # newton-cg 314 # L-BFGS-B 315 def fit(self, X, y, **kwargs): 316 """Fit Ridge model to training data (X, y). 317 318 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 319 for K classes and p covariates. 320 321 Args: 322 323 X: {array-like}, shape = [n_samples, n_features] 324 Training vectors, where n_samples is the number 325 of samples and n_features is the number of features. 326 327 y: array-like, shape = [n_samples] 328 Target values. 329 330 **kwargs: additional parameters to be passed to 331 self.cook_training_set or self.obj.fit 332 333 Returns: 334 335 self: object 336 337 """ 338 339 assert mx.is_factor(y), "y must contain only integers" 340 341 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 342 343 self.n_classes = len(np.unique(y)) 344 self.classes_ = np.unique(y) # for compatibility with sklearn 345 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 346 347 Y = mo.one_hot_encode2(output_y, self.n_classes) 348 349 # optimize for beta, minimize self.loglik (maximize loglik) ----- 350 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 351 352 if self.solver == "L-BFGS-B": 353 opt = minimize( 354 fun=loglik_func, 355 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 356 jac=grad_func, 357 method=self.solver, 358 ) 359 self.beta_ = opt.x 360 self.minloglik_ = opt.fun 361 362 if self.solver in ("Newton-CG", "trust-ncg"): 363 opt = minimize( 364 fun=loglik_func, 365 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 366 jac=grad_func, 367 hess=hessian_func, 368 method=self.solver, 369 ) 370 self.beta_ = opt.x 371 self.minloglik_ = opt.fun 372 373 if self.solver == "L-BFGS-B-lstsq": 374 opt = minimize( 375 fun=loglik_func, 376 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 377 jac=grad_func, 378 method="L-BFGS-B", 379 ) 380 self.beta_ = opt.x 381 self.minloglik_ = opt.fun 382 383 if self.solver in "Newton-CG-lstsq": 384 opt = minimize( 385 fun=loglik_func, 386 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 387 jac=grad_func, 388 hess=hessian_func, 389 method="Newton-CG", 390 ) 391 self.beta_ = opt.x 392 self.minloglik_ = opt.fun 393 394 if self.solver in "trust-ncg-lstsq": 395 opt = minimize( 396 fun=loglik_func, 397 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 398 jac=grad_func, 399 hess=hessian_func, 400 method="trust-ncg", 401 ) 402 self.beta_ = opt.x 403 self.minloglik_ = opt.fun 404 405 self.classes_ = np.unique(y) 406 407 return self 408 409 def predict(self, X, **kwargs): 410 """Predict test data X. 411 412 Args: 413 414 X: {array-like}, shape = [n_samples, n_features] 415 Training vectors, where n_samples is the number 416 of samples and n_features is the number of features. 417 418 **kwargs: additional parameters to be passed to 419 self.cook_test_set 420 421 Returns: 422 423 model predictions: {array-like} 424 """ 425 426 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 427 428 def predict_proba(self, X, **kwargs): 429 """Predict probabilities for test data X. 430 431 Args: 432 433 X: {array-like}, shape = [n_samples, n_features] 434 Training vectors, where n_samples is the number 435 of samples and n_features is the number of features. 436 437 **kwargs: additional parameters to be passed to 438 self.cook_test_set 439 440 Returns: 441 442 probability estimates for test data: {array-like} 443 444 """ 445 if len(X.shape) == 1: 446 n_features = X.shape[0] 447 new_X = mo.rbind( 448 X.reshape(1, n_features), 449 np.ones(n_features).reshape(1, n_features), 450 ) 451 452 Z = self.cook_test_set(new_X, **kwargs) 453 454 else: 455 Z = self.cook_test_set(X, **kwargs) 456 457 ZB = mo.safe_sparse_dot( 458 a=Z, 459 b=self.beta_.reshape( 460 self.n_classes, 461 X.shape[1] + self.n_hidden_features + self.n_clusters, 462 ).T, 463 backend=self.backend, 464 ) 465 466 exp_ZB = np.exp(ZB) 467 468 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Multinomial logit classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
direct_link: boolean
indicates if the original predictors are included (True) in model's
fitting or not (False)
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
solver: str
optimization function "L-BFGS-B", "Newton-CG",
"trust-ncg", "L-BFGS-B-lstsq", "Newton-CG-lstsq",
"trust-ncg-lstsq" (see scipy.optimize.minimize)
When using "L-BFGS-B-lstsq", "Newton-CG-lstsq", or "trust-ncg-lstsq",
the initial value for the optimization is set to the least squares solution
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
classes_: {array-like}
unique classes in the target variable
minloglik_: float
minimum value of the negative log-likelihood
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridge_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from time import time
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
# split data into training test and test set
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# create the model with nnetsauce
fit_obj = ns.Ridge2Classifier(lambda1 = 6.90185578e+04,
lambda2 = 3.17392781e+02,
n_hidden_features=95,
n_clusters=2,
dropout = 3.62817383e-01,
type_clust = "gmm")
# fit the model on training set
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
# get the accuracy on test set
start = time()
print(fit_obj.score(X_test, y_test))
print(f"Elapsed {time() - start}")
# get area under the curve on test set (auc)
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
315 def fit(self, X, y, **kwargs): 316 """Fit Ridge model to training data (X, y). 317 318 for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) 319 for K classes and p covariates. 320 321 Args: 322 323 X: {array-like}, shape = [n_samples, n_features] 324 Training vectors, where n_samples is the number 325 of samples and n_features is the number of features. 326 327 y: array-like, shape = [n_samples] 328 Target values. 329 330 **kwargs: additional parameters to be passed to 331 self.cook_training_set or self.obj.fit 332 333 Returns: 334 335 self: object 336 337 """ 338 339 assert mx.is_factor(y), "y must contain only integers" 340 341 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 342 343 self.n_classes = len(np.unique(y)) 344 self.classes_ = np.unique(y) # for compatibility with sklearn 345 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 346 347 Y = mo.one_hot_encode2(output_y, self.n_classes) 348 349 # optimize for beta, minimize self.loglik (maximize loglik) ----- 350 loglik_func, grad_func, hessian_func = self.loglik(X=scaled_Z, Y=Y) 351 352 if self.solver == "L-BFGS-B": 353 opt = minimize( 354 fun=loglik_func, 355 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 356 jac=grad_func, 357 method=self.solver, 358 ) 359 self.beta_ = opt.x 360 self.minloglik_ = opt.fun 361 362 if self.solver in ("Newton-CG", "trust-ncg"): 363 opt = minimize( 364 fun=loglik_func, 365 x0=np.zeros(scaled_Z.shape[1] * self.n_classes), 366 jac=grad_func, 367 hess=hessian_func, 368 method=self.solver, 369 ) 370 self.beta_ = opt.x 371 self.minloglik_ = opt.fun 372 373 if self.solver == "L-BFGS-B-lstsq": 374 opt = minimize( 375 fun=loglik_func, 376 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 377 jac=grad_func, 378 method="L-BFGS-B", 379 ) 380 self.beta_ = opt.x 381 self.minloglik_ = opt.fun 382 383 if self.solver in "Newton-CG-lstsq": 384 opt = minimize( 385 fun=loglik_func, 386 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 387 jac=grad_func, 388 hess=hessian_func, 389 method="Newton-CG", 390 ) 391 self.beta_ = opt.x 392 self.minloglik_ = opt.fun 393 394 if self.solver in "trust-ncg-lstsq": 395 opt = minimize( 396 fun=loglik_func, 397 x0=np.linalg.lstsq(scaled_Z, Y, rcond=None)[0].flatten(order="F"), 398 jac=grad_func, 399 hess=hessian_func, 400 method="trust-ncg", 401 ) 402 self.beta_ = opt.x 403 self.minloglik_ = opt.fun 404 405 self.classes_ = np.unique(y) 406 407 return self
Fit Ridge model to training data (X, y).
for beta: regression coeffs (beta11, ..., beta1p, ..., betaK1, ..., betaKp) for K classes and p covariates.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
409 def predict(self, X, **kwargs): 410 """Predict test data X. 411 412 Args: 413 414 X: {array-like}, shape = [n_samples, n_features] 415 Training vectors, where n_samples is the number 416 of samples and n_features is the number of features. 417 418 **kwargs: additional parameters to be passed to 419 self.cook_test_set 420 421 Returns: 422 423 model predictions: {array-like} 424 """ 425 426 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
428 def predict_proba(self, X, **kwargs): 429 """Predict probabilities for test data X. 430 431 Args: 432 433 X: {array-like}, shape = [n_samples, n_features] 434 Training vectors, where n_samples is the number 435 of samples and n_features is the number of features. 436 437 **kwargs: additional parameters to be passed to 438 self.cook_test_set 439 440 Returns: 441 442 probability estimates for test data: {array-like} 443 444 """ 445 if len(X.shape) == 1: 446 n_features = X.shape[0] 447 new_X = mo.rbind( 448 X.reshape(1, n_features), 449 np.ones(n_features).reshape(1, n_features), 450 ) 451 452 Z = self.cook_test_set(new_X, **kwargs) 453 454 else: 455 Z = self.cook_test_set(X, **kwargs) 456 457 ZB = mo.safe_sparse_dot( 458 a=Z, 459 b=self.beta_.reshape( 460 self.n_classes, 461 X.shape[1] + self.n_hidden_features + self.n_clusters, 462 ).T, 463 backend=self.backend, 464 ) 465 466 exp_ZB = np.exp(ZB) 467 468 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
23class Ridge2MultitaskClassifier(Ridge2, ClassifierMixin): 24 """Multitask Ridge classification with 2 regularization parameters 25 26 Parameters: 27 28 n_hidden_features: int 29 number of nodes in the hidden layer 30 31 activation_name: str 32 activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu' 33 34 a: float 35 hyperparameter for 'prelu' or 'elu' activation function 36 37 nodes_sim: str 38 type of simulation for the nodes: 'sobol', 'hammersley', 'halton', 39 'uniform' 40 41 bias: boolean 42 indicates if the hidden layer contains a bias term (True) or not 43 (False) 44 45 dropout: float 46 regularization parameter; (random) percentage of nodes dropped out 47 of the training 48 49 n_clusters: int 50 number of clusters for 'kmeans' or 'gmm' clustering (could be 0: 51 no clustering) 52 53 cluster_encode: bool 54 defines how the variable containing clusters is treated (default is one-hot) 55 if `False`, then labels are used, without one-hot encoding 56 57 type_clust: str 58 type of clustering method: currently k-means ('kmeans') or Gaussian 59 Mixture Model ('gmm') 60 61 type_scaling: a tuple of 3 strings 62 scaling methods for inputs, hidden layer, and clustering respectively 63 (and when relevant). 64 Currently available: standardization ('std') or MinMax scaling ('minmax') 65 66 lambda1: float 67 regularization parameter on direct link 68 69 lambda2: float 70 regularization parameter on hidden layer 71 72 seed: int 73 reproducibility seed for nodes_sim=='uniform' 74 75 backend: str 76 "cpu" or "gpu" or "tpu" 77 78 Attributes: 79 80 beta_: {array-like} 81 regression coefficients 82 83 Examples: 84 85 See also [https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py](https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py) 86 87 ```python 88 import nnetsauce as ns 89 import numpy as np 90 from sklearn.datasets import load_breast_cancer 91 from sklearn.model_selection import train_test_split 92 from sklearn import metrics 93 from time import time 94 95 breast_cancer = load_breast_cancer() 96 Z = breast_cancer.data 97 t = breast_cancer.target 98 np.random.seed(123) 99 X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2) 100 101 fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01), 102 dropout=4.31054687e-01, 103 n_clusters=int(1.71484375e+00), 104 lambda1=1.24023438e+01, lambda2=7.30263672e+03) 105 106 start = time() 107 fit_obj.fit(X_train, y_train) 108 print(f"Elapsed {time() - start}") 109 110 print(fit_obj.score(X_test, y_test)) 111 print(fit_obj.score(X_test, y_test, scoring="roc_auc")) 112 113 start = time() 114 preds = fit_obj.predict(X_test) 115 print(f"Elapsed {time() - start}") 116 print(metrics.classification_report(preds, y_test)) 117 ``` 118 119 """ 120 121 # construct the object ----- 122 123 def __init__( 124 self, 125 n_hidden_features=5, 126 activation_name="relu", 127 a=0.01, 128 nodes_sim="sobol", 129 bias=True, 130 dropout=0, 131 n_clusters=2, 132 cluster_encode=True, 133 type_clust="kmeans", 134 type_scaling=("std", "std", "std"), 135 lambda1=0.1, 136 lambda2=0.1, 137 seed=123, 138 backend="cpu", 139 ): 140 super().__init__( 141 n_hidden_features=n_hidden_features, 142 activation_name=activation_name, 143 a=a, 144 nodes_sim=nodes_sim, 145 bias=bias, 146 dropout=dropout, 147 n_clusters=n_clusters, 148 cluster_encode=cluster_encode, 149 type_clust=type_clust, 150 type_scaling=type_scaling, 151 lambda1=lambda1, 152 lambda2=lambda2, 153 seed=seed, 154 backend=backend, 155 ) 156 157 self.type_fit = "classification" 158 159 def fit(self, X, y, **kwargs): 160 """Fit Ridge model to training data (X, y). 161 162 Args: 163 164 X: {array-like}, shape = [n_samples, n_features] 165 Training vectors, where n_samples is the number 166 of samples and n_features is the number of features. 167 168 y: array-like, shape = [n_samples] 169 Target values. 170 171 **kwargs: additional parameters to be passed to 172 self.cook_training_set or self.obj.fit 173 174 Returns: 175 176 self: object 177 178 """ 179 180 sys_platform = platform.system() 181 182 assert mx.is_factor(y), "y must contain only integers" 183 184 self.classes_ = np.unique(y) # for compatibility with sklearn 185 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 186 187 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 188 189 n_X, p_X = X.shape 190 n_Z, p_Z = scaled_Z.shape 191 192 self.n_classes = len(np.unique(y)) 193 194 # multitask response 195 Y = mo.one_hot_encode2(output_y, self.n_classes) 196 197 if self.n_clusters > 0: 198 if self.encode_clusters == True: 199 n_features = p_X + self.n_clusters 200 else: 201 n_features = p_X + 1 202 else: 203 n_features = p_X 204 205 X_ = scaled_Z[:, 0:n_features] 206 Phi_X_ = scaled_Z[:, n_features:p_Z] 207 208 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 209 np.repeat(1, X_.shape[1]) 210 ) 211 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 212 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 213 np.repeat(1, Phi_X_.shape[1]) 214 ) 215 216 if sys_platform in ("Linux", "Darwin"): 217 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 218 else: 219 B_inv = pinv(B) 220 221 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 222 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 223 224 if sys_platform in ("Linux", "Darwin"): 225 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 226 else: 227 S_inv = pinv(S_mat) 228 229 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 230 inv = mo.rbind( 231 mo.cbind( 232 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 233 y=-np.transpose(Y2), 234 backend=self.backend, 235 ), 236 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 237 backend=self.backend, 238 ) 239 240 self.beta_ = mo.safe_sparse_dot( 241 a=inv, 242 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 243 backend=self.backend, 244 ) 245 self.classes_ = np.unique(y) 246 return self 247 248 def predict(self, X, **kwargs): 249 """Predict test data X. 250 251 Args: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 **kwargs: additional parameters to be passed to 258 self.cook_test_set 259 260 Returns: 261 262 model predictions: {array-like} 263 264 """ 265 266 return np.argmax(self.predict_proba(X, **kwargs), axis=1) 267 268 def predict_proba(self, X, **kwargs): 269 """Predict probabilities for test data X. 270 271 Args: 272 273 X: {array-like}, shape = [n_samples, n_features] 274 Training vectors, where n_samples is the number 275 of samples and n_features is the number of features. 276 277 **kwargs: additional parameters to be passed to 278 self.cook_test_set 279 280 Returns: 281 282 probability estimates for test data: {array-like} 283 284 """ 285 286 if len(X.shape) == 1: 287 n_features = X.shape[0] 288 new_X = mo.rbind( 289 x=X.reshape(1, n_features), 290 y=np.ones(n_features).reshape(1, n_features), 291 backend=self.backend, 292 ) 293 294 Z = self.cook_test_set(new_X, **kwargs) 295 296 else: 297 Z = self.cook_test_set(X, **kwargs) 298 299 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 300 301 exp_ZB = np.exp(ZB) 302 303 return exp_ZB / exp_ZB.sum(axis=1)[:, None] 304 305 def score(self, X, y, scoring=None): 306 """Scoring function for classification. 307 308 Args: 309 310 X: {array-like}, shape = [n_samples, n_features] 311 Training vectors, where n_samples is the number 312 of samples and n_features is the number of features. 313 314 y: array-like, shape = [n_samples] 315 Target values. 316 317 scoring: str 318 scoring method (default is accuracy) 319 320 Returns: 321 322 score: float 323 """ 324 325 if scoring is None: 326 scoring = "accuracy" 327 328 if scoring == "accuracy": 329 return skm2.accuracy_score(y, self.predict(X)) 330 331 if scoring == "f1": 332 return skm2.f1_score(y, self.predict(X)) 333 334 if scoring == "precision": 335 return skm2.precision_score(y, self.predict(X)) 336 337 if scoring == "recall": 338 return skm2.recall_score(y, self.predict(X)) 339 340 if scoring == "roc_auc": 341 return skm2.roc_auc_score(y, self.predict(X)) 342 343 if scoring == "log_loss": 344 return skm2.log_loss(y, self.predict_proba(X)) 345 346 if scoring == "balanced_accuracy": 347 return skm2.balanced_accuracy_score(y, self.predict(X)) 348 349 if scoring == "average_precision": 350 return skm2.average_precision_score(y, self.predict(X)) 351 352 if scoring == "neg_brier_score": 353 return -skm2.brier_score_loss(y, self.predict_proba(X)) 354 355 if scoring == "neg_log_loss": 356 return -skm2.log_loss(y, self.predict_proba(X))
Multitask Ridge classification with 2 regularization parameters
Parameters:
n_hidden_features: int
number of nodes in the hidden layer
activation_name: str
activation function: 'relu', 'tanh', 'sigmoid', 'prelu' or 'elu'
a: float
hyperparameter for 'prelu' or 'elu' activation function
nodes_sim: str
type of simulation for the nodes: 'sobol', 'hammersley', 'halton',
'uniform'
bias: boolean
indicates if the hidden layer contains a bias term (True) or not
(False)
dropout: float
regularization parameter; (random) percentage of nodes dropped out
of the training
n_clusters: int
number of clusters for 'kmeans' or 'gmm' clustering (could be 0:
no clustering)
cluster_encode: bool
defines how the variable containing clusters is treated (default is one-hot)
if `False`, then labels are used, without one-hot encoding
type_clust: str
type of clustering method: currently k-means ('kmeans') or Gaussian
Mixture Model ('gmm')
type_scaling: a tuple of 3 strings
scaling methods for inputs, hidden layer, and clustering respectively
(and when relevant).
Currently available: standardization ('std') or MinMax scaling ('minmax')
lambda1: float
regularization parameter on direct link
lambda2: float
regularization parameter on hidden layer
seed: int
reproducibility seed for nodes_sim=='uniform'
backend: str
"cpu" or "gpu" or "tpu"
Attributes:
beta_: {array-like}
regression coefficients
Examples:
See also https://github.com/Techtonique/nnetsauce/blob/master/examples/ridgemtask_classification.py
import nnetsauce as ns
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from time import time
breast_cancer = load_breast_cancer()
Z = breast_cancer.data
t = breast_cancer.target
np.random.seed(123)
X_train, X_test, y_train, y_test = train_test_split(Z, t, test_size=0.2)
fit_obj = ns.Ridge2MultitaskClassifier(n_hidden_features=int(9.83730469e+01),
dropout=4.31054687e-01,
n_clusters=int(1.71484375e+00),
lambda1=1.24023438e+01, lambda2=7.30263672e+03)
start = time()
fit_obj.fit(X_train, y_train)
print(f"Elapsed {time() - start}")
print(fit_obj.score(X_test, y_test))
print(fit_obj.score(X_test, y_test, scoring="roc_auc"))
start = time()
preds = fit_obj.predict(X_test)
print(f"Elapsed {time() - start}")
print(metrics.classification_report(preds, y_test))
159 def fit(self, X, y, **kwargs): 160 """Fit Ridge model to training data (X, y). 161 162 Args: 163 164 X: {array-like}, shape = [n_samples, n_features] 165 Training vectors, where n_samples is the number 166 of samples and n_features is the number of features. 167 168 y: array-like, shape = [n_samples] 169 Target values. 170 171 **kwargs: additional parameters to be passed to 172 self.cook_training_set or self.obj.fit 173 174 Returns: 175 176 self: object 177 178 """ 179 180 sys_platform = platform.system() 181 182 assert mx.is_factor(y), "y must contain only integers" 183 184 self.classes_ = np.unique(y) # for compatibility with sklearn 185 self.n_classes_ = len(self.classes_) # for compatibility with sklearn 186 187 output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs) 188 189 n_X, p_X = X.shape 190 n_Z, p_Z = scaled_Z.shape 191 192 self.n_classes = len(np.unique(y)) 193 194 # multitask response 195 Y = mo.one_hot_encode2(output_y, self.n_classes) 196 197 if self.n_clusters > 0: 198 if self.encode_clusters == True: 199 n_features = p_X + self.n_clusters 200 else: 201 n_features = p_X + 1 202 else: 203 n_features = p_X 204 205 X_ = scaled_Z[:, 0:n_features] 206 Phi_X_ = scaled_Z[:, n_features:p_Z] 207 208 B = mo.crossprod(x=X_, backend=self.backend) + self.lambda1 * np.diag( 209 np.repeat(1, X_.shape[1]) 210 ) 211 C = mo.crossprod(x=Phi_X_, y=X_, backend=self.backend) 212 D = mo.crossprod(x=Phi_X_, backend=self.backend) + self.lambda2 * np.diag( 213 np.repeat(1, Phi_X_.shape[1]) 214 ) 215 216 if sys_platform in ("Linux", "Darwin"): 217 B_inv = pinv(B) if self.backend == "cpu" else jpinv(B) 218 else: 219 B_inv = pinv(B) 220 221 W = mo.safe_sparse_dot(a=C, b=B_inv, backend=self.backend) 222 S_mat = D - mo.tcrossprod(x=W, y=C, backend=self.backend) 223 224 if sys_platform in ("Linux", "Darwin"): 225 S_inv = pinv(S_mat) if self.backend == "cpu" else jpinv(S_mat) 226 else: 227 S_inv = pinv(S_mat) 228 229 Y2 = mo.safe_sparse_dot(a=S_inv, b=W, backend=self.backend) 230 inv = mo.rbind( 231 mo.cbind( 232 x=B_inv + mo.crossprod(x=W, y=Y2, backend=self.backend), 233 y=-np.transpose(Y2), 234 backend=self.backend, 235 ), 236 mo.cbind(x=-Y2, y=S_inv, backend=self.backend), 237 backend=self.backend, 238 ) 239 240 self.beta_ = mo.safe_sparse_dot( 241 a=inv, 242 b=mo.crossprod(x=scaled_Z, y=Y, backend=self.backend), 243 backend=self.backend, 244 ) 245 self.classes_ = np.unique(y) 246 return self
Fit Ridge model to training data (X, y).
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Returns:
self: object
248 def predict(self, X, **kwargs): 249 """Predict test data X. 250 251 Args: 252 253 X: {array-like}, shape = [n_samples, n_features] 254 Training vectors, where n_samples is the number 255 of samples and n_features is the number of features. 256 257 **kwargs: additional parameters to be passed to 258 self.cook_test_set 259 260 Returns: 261 262 model predictions: {array-like} 263 264 """ 265 266 return np.argmax(self.predict_proba(X, **kwargs), axis=1)
Predict test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
model predictions: {array-like}
268 def predict_proba(self, X, **kwargs): 269 """Predict probabilities for test data X. 270 271 Args: 272 273 X: {array-like}, shape = [n_samples, n_features] 274 Training vectors, where n_samples is the number 275 of samples and n_features is the number of features. 276 277 **kwargs: additional parameters to be passed to 278 self.cook_test_set 279 280 Returns: 281 282 probability estimates for test data: {array-like} 283 284 """ 285 286 if len(X.shape) == 1: 287 n_features = X.shape[0] 288 new_X = mo.rbind( 289 x=X.reshape(1, n_features), 290 y=np.ones(n_features).reshape(1, n_features), 291 backend=self.backend, 292 ) 293 294 Z = self.cook_test_set(new_X, **kwargs) 295 296 else: 297 Z = self.cook_test_set(X, **kwargs) 298 299 ZB = mo.safe_sparse_dot(a=Z, b=self.beta_, backend=self.backend) 300 301 exp_ZB = np.exp(ZB) 302 303 return exp_ZB / exp_ZB.sum(axis=1)[:, None]
Predict probabilities for test data X.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
**kwargs: additional parameters to be passed to
self.cook_test_set
Returns:
probability estimates for test data: {array-like}
305 def score(self, X, y, scoring=None): 306 """Scoring function for classification. 307 308 Args: 309 310 X: {array-like}, shape = [n_samples, n_features] 311 Training vectors, where n_samples is the number 312 of samples and n_features is the number of features. 313 314 y: array-like, shape = [n_samples] 315 Target values. 316 317 scoring: str 318 scoring method (default is accuracy) 319 320 Returns: 321 322 score: float 323 """ 324 325 if scoring is None: 326 scoring = "accuracy" 327 328 if scoring == "accuracy": 329 return skm2.accuracy_score(y, self.predict(X)) 330 331 if scoring == "f1": 332 return skm2.f1_score(y, self.predict(X)) 333 334 if scoring == "precision": 335 return skm2.precision_score(y, self.predict(X)) 336 337 if scoring == "recall": 338 return skm2.recall_score(y, self.predict(X)) 339 340 if scoring == "roc_auc": 341 return skm2.roc_auc_score(y, self.predict(X)) 342 343 if scoring == "log_loss": 344 return skm2.log_loss(y, self.predict_proba(X)) 345 346 if scoring == "balanced_accuracy": 347 return skm2.balanced_accuracy_score(y, self.predict(X)) 348 349 if scoring == "average_precision": 350 return skm2.average_precision_score(y, self.predict(X)) 351 352 if scoring == "neg_brier_score": 353 return -skm2.brier_score_loss(y, self.predict_proba(X)) 354 355 if scoring == "neg_log_loss": 356 return -skm2.log_loss(y, self.predict_proba(X))
Scoring function for classification.
Args:
X: {array-like}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number
of samples and n_features is the number of features.
y: array-like, shape = [n_samples]
Target values.
scoring: str
scoring method (default is accuracy)
Returns:
score: float
6class SubSampler: 7 """Subsampling class. 8 9 Attributes: 10 11 y: array-like, shape = [n_samples] 12 Target values. 13 14 row_sample: double 15 subsampling fraction 16 17 n_samples: int 18 subsampling by using the number of rows (supersedes row_sample) 19 20 seed: int 21 reproductibility seed 22 23 n_jobs: int 24 number of jobs to run in parallel 25 26 verbose: bool 27 print progress messages and bars 28 """ 29 30 def __init__( 31 self, 32 y, 33 row_sample=0.8, 34 n_samples=None, 35 seed=123, 36 n_jobs=None, 37 verbose=False, 38 ): 39 self.y = y 40 self.n_samples = n_samples 41 if self.n_samples is None: 42 assert ( 43 row_sample < 1 and row_sample >= 0 44 ), "'row_sample' must be provided, plus < 1 and >= 0" 45 self.row_sample = row_sample 46 else: 47 assert self.n_samples < len(y), "'n_samples' must be < len(y)" 48 self.row_sample = self.n_samples / len(y) 49 self.seed = seed 50 self.indices = None 51 self.n_jobs = n_jobs 52 self.verbose = verbose 53 54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Subsampling class.
Attributes:
y: array-like, shape = [n_samples] Target values.
row_sample: double subsampling fraction
n_samples: int subsampling by using the number of rows (supersedes row_sample)
seed: int reproductibility seed
n_jobs: int number of jobs to run in parallel
verbose: bool print progress messages and bars
54 def subsample(self): 55 """Returns indices of subsampled input data. 56 57 Examples: 58 59 <ul> 60 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240105_subsampling.ipynb">20240105_subsampling.ipynb</a> </li> 61 <li> <a href="https://github.com/Techtonique/nnetsauce/blob/master/nnetsauce/demo/thierrymoudiki_20240131_subsampling_nsamples.ipynb">20240131_subsampling_nsamples.ipynb</a> </li> 62 </ul> 63 64 """ 65 self.indices = dosubsample( 66 y=self.y, 67 row_sample=self.row_sample, 68 seed=self.seed, 69 n_jobs=self.n_jobs, 70 verbose=self.verbose, 71 ) 72 return self.indices
Returns indices of subsampled input data.
Examples: